1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * relocate_kernel.S - put the kernel image in place to boot 4 * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> 5 */ 6 7#include <linux/linkage.h> 8#include <linux/stringify.h> 9#include <asm/alternative.h> 10#include <asm/page_types.h> 11#include <asm/kexec.h> 12#include <asm/processor-flags.h> 13#include <asm/pgtable_types.h> 14#include <asm/nospec-branch.h> 15#include <asm/unwind_hints.h> 16 17/* 18 * Must be relocatable PIC code callable as a C function, in particular 19 * there must be a plain RET and not jump to return thunk. 20 */ 21 22#define PTR(x) (x << 3) 23#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 24 25/* 26 * control_page + KEXEC_CONTROL_CODE_MAX_SIZE 27 * ~ control_page + PAGE_SIZE are used as data storage and stack for 28 * jumping back 29 */ 30#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) 31 32/* Minimal CPU state */ 33#define RSP DATA(0x0) 34#define CR0 DATA(0x8) 35#define CR3 DATA(0x10) 36#define CR4 DATA(0x18) 37 38/* other data */ 39#define CP_PA_TABLE_PAGE DATA(0x20) 40#define CP_PA_SWAP_PAGE DATA(0x28) 41#define CP_PA_BACKUP_PAGES_MAP DATA(0x30) 42 43 .text 44 .align PAGE_SIZE 45 .code64 46SYM_CODE_START_NOALIGN(relocate_range) 47SYM_CODE_START_NOALIGN(relocate_kernel) 48 UNWIND_HINT_END_OF_STACK 49 ANNOTATE_NOENDBR 50 /* 51 * %rdi indirection_page 52 * %rsi page_list 53 * %rdx start address 54 * %rcx preserve_context 55 * %r8 host_mem_enc_active 56 */ 57 58 /* Save the CPU context, used for jumping back */ 59 pushq %rbx 60 pushq %rbp 61 pushq %r12 62 pushq %r13 63 pushq %r14 64 pushq %r15 65 pushf 66 67 movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 68 movq %rsp, RSP(%r11) 69 movq %cr0, %rax 70 movq %rax, CR0(%r11) 71 movq %cr3, %rax 72 movq %rax, CR3(%r11) 73 movq %cr4, %rax 74 movq %rax, CR4(%r11) 75 76 /* Save CR4. Required to enable the right paging mode later. */ 77 movq %rax, %r13 78 79 /* zero out flags, and disable interrupts */ 80 pushq $0 81 popfq 82 83 /* Save SME active flag */ 84 movq %r8, %r12 85 86 /* 87 * get physical address of control page now 88 * this is impossible after page table switch 89 */ 90 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 91 92 /* get physical address of page table now too */ 93 movq PTR(PA_TABLE_PAGE)(%rsi), %r9 94 95 /* get physical address of swap page now */ 96 movq PTR(PA_SWAP_PAGE)(%rsi), %r10 97 98 /* save some information for jumping back */ 99 movq %r9, CP_PA_TABLE_PAGE(%r11) 100 movq %r10, CP_PA_SWAP_PAGE(%r11) 101 movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11) 102 103 /* Switch to the identity mapped page tables */ 104 movq %r9, %cr3 105 106 /* setup a new stack at the end of the physical control page */ 107 lea PAGE_SIZE(%r8), %rsp 108 109 /* jump to identity mapped page */ 110 addq $(identity_mapped - relocate_kernel), %r8 111 pushq %r8 112 ANNOTATE_UNRET_SAFE 113 ret 114 int3 115SYM_CODE_END(relocate_kernel) 116 117SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) 118 UNWIND_HINT_END_OF_STACK 119 /* set return address to 0 if not preserving context */ 120 pushq $0 121 /* store the start address on the stack */ 122 pushq %rdx 123 124 /* 125 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP 126 * below. 127 */ 128 movq %cr4, %rax 129 andq $~(X86_CR4_CET), %rax 130 movq %rax, %cr4 131 132 /* 133 * Set cr0 to a known state: 134 * - Paging enabled 135 * - Alignment check disabled 136 * - Write protect disabled 137 * - No task switch 138 * - Don't do FP software emulation. 139 * - Protected mode enabled 140 */ 141 movq %cr0, %rax 142 andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax 143 orl $(X86_CR0_PG | X86_CR0_PE), %eax 144 movq %rax, %cr0 145 146 /* 147 * Set cr4 to a known state: 148 * - physical address extension enabled 149 * - 5-level paging, if it was enabled before 150 * - Machine check exception on TDX guest, if it was enabled before. 151 * Clearing MCE might not be allowed in TDX guests, depending on setup. 152 * 153 * Use R13 that contains the original CR4 value, read in relocate_kernel(). 154 * PAE is always set in the original CR4. 155 */ 156 andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d 157 ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST 158 movq %r13, %cr4 159 160 /* Flush the TLB (needed?) */ 161 movq %r9, %cr3 162 163 /* 164 * If SME is active, there could be old encrypted cache line 165 * entries that will conflict with the now unencrypted memory 166 * used by kexec. Flush the caches before copying the kernel. 167 */ 168 testq %r12, %r12 169 jz .Lsme_off 170 wbinvd 171.Lsme_off: 172 173 movq %rcx, %r11 174 call swap_pages 175 176 /* 177 * To be certain of avoiding problems with self-modifying code 178 * I need to execute a serializing instruction here. 179 * So I flush the TLB by reloading %cr3 here, it's handy, 180 * and not processor dependent. 181 */ 182 movq %cr3, %rax 183 movq %rax, %cr3 184 185 /* 186 * set all of the registers to known values 187 * leave %rsp alone 188 */ 189 190 testq %r11, %r11 191 jnz .Lrelocate 192 xorl %eax, %eax 193 xorl %ebx, %ebx 194 xorl %ecx, %ecx 195 xorl %edx, %edx 196 xorl %esi, %esi 197 xorl %edi, %edi 198 xorl %ebp, %ebp 199 xorl %r8d, %r8d 200 xorl %r9d, %r9d 201 xorl %r10d, %r10d 202 xorl %r11d, %r11d 203 xorl %r12d, %r12d 204 xorl %r13d, %r13d 205 xorl %r14d, %r14d 206 xorl %r15d, %r15d 207 208 ANNOTATE_UNRET_SAFE 209 ret 210 int3 211 212.Lrelocate: 213 popq %rdx 214 leaq PAGE_SIZE(%r10), %rsp 215 ANNOTATE_RETPOLINE_SAFE 216 call *%rdx 217 218 /* get the re-entry point of the peer system */ 219 movq 0(%rsp), %rbp 220 leaq relocate_kernel(%rip), %r8 221 movq CP_PA_SWAP_PAGE(%r8), %r10 222 movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi 223 movq CP_PA_TABLE_PAGE(%r8), %rax 224 movq %rax, %cr3 225 lea PAGE_SIZE(%r8), %rsp 226 call swap_pages 227 movq $virtual_mapped, %rax 228 pushq %rax 229 ANNOTATE_UNRET_SAFE 230 ret 231 int3 232SYM_CODE_END(identity_mapped) 233 234SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) 235 UNWIND_HINT_END_OF_STACK 236 ANNOTATE_NOENDBR // RET target, above 237 movq RSP(%r8), %rsp 238 movq CR4(%r8), %rax 239 movq %rax, %cr4 240 movq CR3(%r8), %rax 241 movq CR0(%r8), %r8 242 movq %rax, %cr3 243 movq %r8, %cr0 244 movq %rbp, %rax 245 246 popf 247 popq %r15 248 popq %r14 249 popq %r13 250 popq %r12 251 popq %rbp 252 popq %rbx 253 ANNOTATE_UNRET_SAFE 254 ret 255 int3 256SYM_CODE_END(virtual_mapped) 257 258 /* Do the copies */ 259SYM_CODE_START_LOCAL_NOALIGN(swap_pages) 260 UNWIND_HINT_END_OF_STACK 261 movq %rdi, %rcx /* Put the page_list in %rcx */ 262 xorl %edi, %edi 263 xorl %esi, %esi 264 jmp 1f 265 2660: /* top, read another word for the indirection page */ 267 268 movq (%rbx), %rcx 269 addq $8, %rbx 2701: 271 testb $0x1, %cl /* is it a destination page? */ 272 jz 2f 273 movq %rcx, %rdi 274 andq $0xfffffffffffff000, %rdi 275 jmp 0b 2762: 277 testb $0x2, %cl /* is it an indirection page? */ 278 jz 2f 279 movq %rcx, %rbx 280 andq $0xfffffffffffff000, %rbx 281 jmp 0b 2822: 283 testb $0x4, %cl /* is it the done indicator? */ 284 jz 2f 285 jmp 3f 2862: 287 testb $0x8, %cl /* is it the source indicator? */ 288 jz 0b /* Ignore it otherwise */ 289 movq %rcx, %rsi /* For ever source page do a copy */ 290 andq $0xfffffffffffff000, %rsi 291 292 movq %rdi, %rdx 293 movq %rsi, %rax 294 295 movq %r10, %rdi 296 movl $512, %ecx 297 rep ; movsq 298 299 movq %rax, %rdi 300 movq %rdx, %rsi 301 movl $512, %ecx 302 rep ; movsq 303 304 movq %rdx, %rdi 305 movq %r10, %rsi 306 movl $512, %ecx 307 rep ; movsq 308 309 lea PAGE_SIZE(%rax), %rsi 310 jmp 0b 3113: 312 ANNOTATE_UNRET_SAFE 313 ret 314 int3 315SYM_CODE_END(swap_pages) 316 317 .skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc 318SYM_CODE_END(relocate_range); 319