xref: /linux/arch/x86/kernel/relocate_kernel_64.S (revision fcc680a647ba77370480fe753664cc10d572b240)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * relocate_kernel.S - put the kernel image in place to boot
4 * Copyright (C) 2002-2005 Eric Biederman  <ebiederm@xmission.com>
5 */
6
7#include <linux/linkage.h>
8#include <linux/stringify.h>
9#include <asm/alternative.h>
10#include <asm/page_types.h>
11#include <asm/kexec.h>
12#include <asm/processor-flags.h>
13#include <asm/pgtable_types.h>
14#include <asm/nospec-branch.h>
15#include <asm/unwind_hints.h>
16#include <asm/asm-offsets.h>
17
18/*
19 * Must be relocatable PIC code callable as a C function, in particular
20 * there must be a plain RET and not jump to return thunk.
21 */
22
23#define PTR(x) (x << 3)
24#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
25
26/*
27 * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
28 * ~ control_page + PAGE_SIZE are used as data storage and stack for
29 * jumping back
30 */
31#define DATA(offset)		(KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
32
33/* Minimal CPU state */
34#define RSP			DATA(0x0)
35#define CR0			DATA(0x8)
36#define CR3			DATA(0x10)
37#define CR4			DATA(0x18)
38
39/* other data */
40#define CP_PA_TABLE_PAGE	DATA(0x20)
41#define CP_PA_SWAP_PAGE		DATA(0x28)
42#define CP_PA_BACKUP_PAGES_MAP	DATA(0x30)
43
44	.text
45	.align PAGE_SIZE
46	.code64
47SYM_CODE_START_NOALIGN(relocate_range)
48SYM_CODE_START_NOALIGN(relocate_kernel)
49	UNWIND_HINT_END_OF_STACK
50	ANNOTATE_NOENDBR
51	/*
52	 * %rdi indirection_page
53	 * %rsi page_list
54	 * %rdx start address
55	 * %rcx preserve_context
56	 * %r8  host_mem_enc_active
57	 */
58
59	/* Save the CPU context, used for jumping back */
60	pushq %rbx
61	pushq %rbp
62	pushq %r12
63	pushq %r13
64	pushq %r14
65	pushq %r15
66	pushf
67
68	movq	PTR(VA_CONTROL_PAGE)(%rsi), %r11
69	movq	%rsp, RSP(%r11)
70	movq	%cr0, %rax
71	movq	%rax, CR0(%r11)
72	movq	%cr3, %rax
73	movq	%rax, CR3(%r11)
74	movq	%cr4, %rax
75	movq	%rax, CR4(%r11)
76
77	/* Save CR4. Required to enable the right paging mode later. */
78	movq	%rax, %r13
79
80	/* zero out flags, and disable interrupts */
81	pushq $0
82	popfq
83
84	/* Save SME active flag */
85	movq	%r8, %r12
86
87	/*
88	 * get physical address of control page now
89	 * this is impossible after page table switch
90	 */
91	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8
92
93	/* get physical address of page table now too */
94	movq	PTR(PA_TABLE_PAGE)(%rsi), %r9
95
96	/* get physical address of swap page now */
97	movq	PTR(PA_SWAP_PAGE)(%rsi), %r10
98
99	/* save some information for jumping back */
100	movq	%r9, CP_PA_TABLE_PAGE(%r11)
101	movq	%r10, CP_PA_SWAP_PAGE(%r11)
102	movq	%rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
103
104	/* Switch to the identity mapped page tables */
105	movq	%r9, %cr3
106
107	/* setup a new stack at the end of the physical control page */
108	lea	PAGE_SIZE(%r8), %rsp
109
110	/* jump to identity mapped page */
111	addq	$(identity_mapped - relocate_kernel), %r8
112	pushq	%r8
113	ANNOTATE_UNRET_SAFE
114	ret
115	int3
116SYM_CODE_END(relocate_kernel)
117
118SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
119	UNWIND_HINT_END_OF_STACK
120	/* set return address to 0 if not preserving context */
121	pushq	$0
122	/* store the start address on the stack */
123	pushq   %rdx
124
125	/*
126	 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
127	 * below.
128	 */
129	movq	%cr4, %rax
130	andq	$~(X86_CR4_CET), %rax
131	movq	%rax, %cr4
132
133	/*
134	 * Set cr0 to a known state:
135	 *  - Paging enabled
136	 *  - Alignment check disabled
137	 *  - Write protect disabled
138	 *  - No task switch
139	 *  - Don't do FP software emulation.
140	 *  - Protected mode enabled
141	 */
142	movq	%cr0, %rax
143	andq	$~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax
144	orl	$(X86_CR0_PG | X86_CR0_PE), %eax
145	movq	%rax, %cr0
146
147	/*
148	 * Set cr4 to a known state:
149	 *  - physical address extension enabled
150	 *  - 5-level paging, if it was enabled before
151	 *  - Machine check exception on TDX guest, if it was enabled before.
152	 *    Clearing MCE might not be allowed in TDX guests, depending on setup.
153	 *
154	 * Use R13 that contains the original CR4 value, read in relocate_kernel().
155	 * PAE is always set in the original CR4.
156	 */
157	andl	$(X86_CR4_PAE | X86_CR4_LA57), %r13d
158	ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST
159	movq	%r13, %cr4
160
161	/* Flush the TLB (needed?) */
162	movq	%r9, %cr3
163
164	/*
165	 * If SME is active, there could be old encrypted cache line
166	 * entries that will conflict with the now unencrypted memory
167	 * used by kexec. Flush the caches before copying the kernel.
168	 */
169	testq	%r12, %r12
170	jz .Lsme_off
171	wbinvd
172.Lsme_off:
173
174	/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
175	movq	%rcx, %r11
176	call	swap_pages
177
178	/*
179	 * To be certain of avoiding problems with self-modifying code
180	 * I need to execute a serializing instruction here.
181	 * So I flush the TLB by reloading %cr3 here, it's handy,
182	 * and not processor dependent.
183	 */
184	movq	%cr3, %rax
185	movq	%rax, %cr3
186
187	/*
188	 * set all of the registers to known values
189	 * leave %rsp alone
190	 */
191
192	testq	%r11, %r11
193	jnz .Lrelocate
194	xorl	%eax, %eax
195	xorl	%ebx, %ebx
196	xorl    %ecx, %ecx
197	xorl    %edx, %edx
198	xorl    %esi, %esi
199	xorl    %edi, %edi
200	xorl    %ebp, %ebp
201	xorl	%r8d, %r8d
202	xorl	%r9d, %r9d
203	xorl	%r10d, %r10d
204	xorl	%r11d, %r11d
205	xorl	%r12d, %r12d
206	xorl	%r13d, %r13d
207	xorl	%r14d, %r14d
208	xorl	%r15d, %r15d
209
210	ANNOTATE_UNRET_SAFE
211	ret
212	int3
213
214.Lrelocate:
215	popq	%rdx
216	leaq	PAGE_SIZE(%r10), %rsp
217	ANNOTATE_RETPOLINE_SAFE
218	call	*%rdx
219
220	/* get the re-entry point of the peer system */
221	movq	0(%rsp), %rbp
222	leaq	relocate_kernel(%rip), %r8
223	movq	CP_PA_SWAP_PAGE(%r8), %r10
224	movq	CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
225	movq	CP_PA_TABLE_PAGE(%r8), %rax
226	movq	%rax, %cr3
227	lea	PAGE_SIZE(%r8), %rsp
228	call	swap_pages
229	movq	$virtual_mapped, %rax
230	pushq	%rax
231	ANNOTATE_UNRET_SAFE
232	ret
233	int3
234SYM_CODE_END(identity_mapped)
235
236SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
237	UNWIND_HINT_END_OF_STACK
238	ANNOTATE_NOENDBR // RET target, above
239	movq	RSP(%r8), %rsp
240	movq	CR4(%r8), %rax
241	movq	%rax, %cr4
242	movq	CR3(%r8), %rax
243	movq	CR0(%r8), %r8
244	movq	%rax, %cr3
245	movq	%r8, %cr0
246
247#ifdef CONFIG_KEXEC_JUMP
248	/* Saved in save_processor_state. */
249	movq    $saved_context, %rax
250	lgdt    saved_context_gdt_desc(%rax)
251#endif
252
253	movq	%rbp, %rax
254
255	popf
256	popq	%r15
257	popq	%r14
258	popq	%r13
259	popq	%r12
260	popq	%rbp
261	popq	%rbx
262	ANNOTATE_UNRET_SAFE
263	ret
264	int3
265SYM_CODE_END(virtual_mapped)
266
267	/* Do the copies */
268SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
269	UNWIND_HINT_END_OF_STACK
270	movq	%rdi, %rcx	/* Put the indirection_page in %rcx */
271	xorl	%edi, %edi
272	xorl	%esi, %esi
273	jmp	1f
274
2750:	/* top, read another word for the indirection page */
276
277	movq	(%rbx), %rcx
278	addq	$8,	%rbx
2791:
280	testb	$0x1,	%cl   /* is it a destination page? */
281	jz	2f
282	movq	%rcx,	%rdi
283	andq	$0xfffffffffffff000, %rdi
284	jmp	0b
2852:
286	testb	$0x2,	%cl   /* is it an indirection page? */
287	jz	2f
288	movq	%rcx,   %rbx
289	andq	$0xfffffffffffff000, %rbx
290	jmp	0b
2912:
292	testb	$0x4,	%cl   /* is it the done indicator? */
293	jz	2f
294	jmp	3f
2952:
296	testb	$0x8,	%cl   /* is it the source indicator? */
297	jz	0b	      /* Ignore it otherwise */
298	movq	%rcx,   %rsi  /* For ever source page do a copy */
299	andq	$0xfffffffffffff000, %rsi
300
301	movq	%rdi, %rdx    /* Save destination page to %rdx */
302	movq	%rsi, %rax    /* Save source page to %rax */
303
304	/* copy source page to swap page */
305	movq	%r10, %rdi
306	movl	$512, %ecx
307	rep ; movsq
308
309	/* copy destination page to source page */
310	movq	%rax, %rdi
311	movq	%rdx, %rsi
312	movl	$512, %ecx
313	rep ; movsq
314
315	/* copy swap page to destination page */
316	movq	%rdx, %rdi
317	movq	%r10, %rsi
318	movl	$512, %ecx
319	rep ; movsq
320
321	lea	PAGE_SIZE(%rax), %rsi
322	jmp	0b
3233:
324	ANNOTATE_UNRET_SAFE
325	ret
326	int3
327SYM_CODE_END(swap_pages)
328
329	.skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc
330SYM_CODE_END(relocate_range);
331