1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * relocate_kernel.S - put the kernel image in place to boot 4 * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> 5 */ 6 7#include <linux/linkage.h> 8#include <linux/stringify.h> 9#include <asm/alternative.h> 10#include <asm/page_types.h> 11#include <asm/kexec.h> 12#include <asm/processor-flags.h> 13#include <asm/pgtable_types.h> 14#include <asm/nospec-branch.h> 15#include <asm/unwind_hints.h> 16 17/* 18 * Must be relocatable PIC code callable as a C function, in particular 19 * there must be a plain RET and not jump to return thunk. 20 */ 21 22#define PTR(x) (x << 3) 23#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 24 25/* 26 * control_page + KEXEC_CONTROL_CODE_MAX_SIZE 27 * ~ control_page + PAGE_SIZE are used as data storage and stack for 28 * jumping back 29 */ 30#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) 31 32/* Minimal CPU state */ 33#define RSP DATA(0x0) 34#define CR0 DATA(0x8) 35#define CR3 DATA(0x10) 36#define CR4 DATA(0x18) 37 38/* other data */ 39#define CP_PA_TABLE_PAGE DATA(0x20) 40#define CP_PA_SWAP_PAGE DATA(0x28) 41#define CP_PA_BACKUP_PAGES_MAP DATA(0x30) 42 43 .text 44 .align PAGE_SIZE 45 .code64 46SYM_CODE_START_NOALIGN(relocate_range) 47SYM_CODE_START_NOALIGN(relocate_kernel) 48 UNWIND_HINT_END_OF_STACK 49 ANNOTATE_NOENDBR 50 /* 51 * %rdi indirection_page 52 * %rsi page_list 53 * %rdx start address 54 * %rcx preserve_context 55 * %r8 host_mem_enc_active 56 */ 57 58 /* Save the CPU context, used for jumping back */ 59 pushq %rbx 60 pushq %rbp 61 pushq %r12 62 pushq %r13 63 pushq %r14 64 pushq %r15 65 pushf 66 67 movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 68 movq %rsp, RSP(%r11) 69 movq %cr0, %rax 70 movq %rax, CR0(%r11) 71 movq %cr3, %rax 72 movq %rax, CR3(%r11) 73 movq %cr4, %rax 74 movq %rax, CR4(%r11) 75 76 /* Save CR4. Required to enable the right paging mode later. */ 77 movq %rax, %r13 78 79 /* zero out flags, and disable interrupts */ 80 pushq $0 81 popfq 82 83 /* Save SME active flag */ 84 movq %r8, %r12 85 86 /* 87 * get physical address of control page now 88 * this is impossible after page table switch 89 */ 90 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 91 92 /* get physical address of page table now too */ 93 movq PTR(PA_TABLE_PAGE)(%rsi), %r9 94 95 /* get physical address of swap page now */ 96 movq PTR(PA_SWAP_PAGE)(%rsi), %r10 97 98 /* save some information for jumping back */ 99 movq %r9, CP_PA_TABLE_PAGE(%r11) 100 movq %r10, CP_PA_SWAP_PAGE(%r11) 101 movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11) 102 103 /* Switch to the identity mapped page tables */ 104 movq %r9, %cr3 105 106 /* setup a new stack at the end of the physical control page */ 107 lea PAGE_SIZE(%r8), %rsp 108 109 /* jump to identity mapped page */ 110 addq $(identity_mapped - relocate_kernel), %r8 111 pushq %r8 112 ANNOTATE_UNRET_SAFE 113 ret 114 int3 115SYM_CODE_END(relocate_kernel) 116 117SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) 118 UNWIND_HINT_END_OF_STACK 119 /* set return address to 0 if not preserving context */ 120 pushq $0 121 /* store the start address on the stack */ 122 pushq %rdx 123 124 /* 125 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP 126 * below. 127 */ 128 movq %cr4, %rax 129 andq $~(X86_CR4_CET), %rax 130 movq %rax, %cr4 131 132 /* 133 * Set cr0 to a known state: 134 * - Paging enabled 135 * - Alignment check disabled 136 * - Write protect disabled 137 * - No task switch 138 * - Don't do FP software emulation. 139 * - Protected mode enabled 140 */ 141 movq %cr0, %rax 142 andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax 143 orl $(X86_CR0_PG | X86_CR0_PE), %eax 144 movq %rax, %cr0 145 146 /* 147 * Set cr4 to a known state: 148 * - physical address extension enabled 149 * - 5-level paging, if it was enabled before 150 * - Machine check exception on TDX guest, if it was enabled before. 151 * Clearing MCE might not be allowed in TDX guests, depending on setup. 152 * 153 * Use R13 that contains the original CR4 value, read in relocate_kernel(). 154 * PAE is always set in the original CR4. 155 */ 156 andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d 157 ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST 158 movq %r13, %cr4 159 160 /* Flush the TLB (needed?) */ 161 movq %r9, %cr3 162 163 /* 164 * If SME is active, there could be old encrypted cache line 165 * entries that will conflict with the now unencrypted memory 166 * used by kexec. Flush the caches before copying the kernel. 167 */ 168 testq %r12, %r12 169 jz .Lsme_off 170 wbinvd 171.Lsme_off: 172 173 /* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */ 174 movq %rcx, %r11 175 call swap_pages 176 177 /* 178 * To be certain of avoiding problems with self-modifying code 179 * I need to execute a serializing instruction here. 180 * So I flush the TLB by reloading %cr3 here, it's handy, 181 * and not processor dependent. 182 */ 183 movq %cr3, %rax 184 movq %rax, %cr3 185 186 /* 187 * set all of the registers to known values 188 * leave %rsp alone 189 */ 190 191 testq %r11, %r11 192 jnz .Lrelocate 193 xorl %eax, %eax 194 xorl %ebx, %ebx 195 xorl %ecx, %ecx 196 xorl %edx, %edx 197 xorl %esi, %esi 198 xorl %edi, %edi 199 xorl %ebp, %ebp 200 xorl %r8d, %r8d 201 xorl %r9d, %r9d 202 xorl %r10d, %r10d 203 xorl %r11d, %r11d 204 xorl %r12d, %r12d 205 xorl %r13d, %r13d 206 xorl %r14d, %r14d 207 xorl %r15d, %r15d 208 209 ANNOTATE_UNRET_SAFE 210 ret 211 int3 212 213.Lrelocate: 214 popq %rdx 215 leaq PAGE_SIZE(%r10), %rsp 216 ANNOTATE_RETPOLINE_SAFE 217 call *%rdx 218 219 /* get the re-entry point of the peer system */ 220 movq 0(%rsp), %rbp 221 leaq relocate_kernel(%rip), %r8 222 movq CP_PA_SWAP_PAGE(%r8), %r10 223 movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi 224 movq CP_PA_TABLE_PAGE(%r8), %rax 225 movq %rax, %cr3 226 lea PAGE_SIZE(%r8), %rsp 227 call swap_pages 228 movq $virtual_mapped, %rax 229 pushq %rax 230 ANNOTATE_UNRET_SAFE 231 ret 232 int3 233SYM_CODE_END(identity_mapped) 234 235SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) 236 UNWIND_HINT_END_OF_STACK 237 ANNOTATE_NOENDBR // RET target, above 238 movq RSP(%r8), %rsp 239 movq CR4(%r8), %rax 240 movq %rax, %cr4 241 movq CR3(%r8), %rax 242 movq CR0(%r8), %r8 243 movq %rax, %cr3 244 movq %r8, %cr0 245 movq %rbp, %rax 246 247 popf 248 popq %r15 249 popq %r14 250 popq %r13 251 popq %r12 252 popq %rbp 253 popq %rbx 254 ANNOTATE_UNRET_SAFE 255 ret 256 int3 257SYM_CODE_END(virtual_mapped) 258 259 /* Do the copies */ 260SYM_CODE_START_LOCAL_NOALIGN(swap_pages) 261 UNWIND_HINT_END_OF_STACK 262 movq %rdi, %rcx /* Put the indirection_page in %rcx */ 263 xorl %edi, %edi 264 xorl %esi, %esi 265 jmp 1f 266 2670: /* top, read another word for the indirection page */ 268 269 movq (%rbx), %rcx 270 addq $8, %rbx 2711: 272 testb $0x1, %cl /* is it a destination page? */ 273 jz 2f 274 movq %rcx, %rdi 275 andq $0xfffffffffffff000, %rdi 276 jmp 0b 2772: 278 testb $0x2, %cl /* is it an indirection page? */ 279 jz 2f 280 movq %rcx, %rbx 281 andq $0xfffffffffffff000, %rbx 282 jmp 0b 2832: 284 testb $0x4, %cl /* is it the done indicator? */ 285 jz 2f 286 jmp 3f 2872: 288 testb $0x8, %cl /* is it the source indicator? */ 289 jz 0b /* Ignore it otherwise */ 290 movq %rcx, %rsi /* For ever source page do a copy */ 291 andq $0xfffffffffffff000, %rsi 292 293 movq %rdi, %rdx /* Save destination page to %rdx */ 294 movq %rsi, %rax /* Save source page to %rax */ 295 296 /* copy source page to swap page */ 297 movq %r10, %rdi 298 movl $512, %ecx 299 rep ; movsq 300 301 /* copy destination page to source page */ 302 movq %rax, %rdi 303 movq %rdx, %rsi 304 movl $512, %ecx 305 rep ; movsq 306 307 /* copy swap page to destination page */ 308 movq %rdx, %rdi 309 movq %r10, %rsi 310 movl $512, %ecx 311 rep ; movsq 312 313 lea PAGE_SIZE(%rax), %rsi 314 jmp 0b 3153: 316 ANNOTATE_UNRET_SAFE 317 ret 318 int3 319SYM_CODE_END(swap_pages) 320 321 .skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc 322SYM_CODE_END(relocate_range); 323