xref: /linux/arch/x86/kernel/relocate_kernel_64.S (revision d53b8e36925256097a08d7cb749198d85cbf9b2b)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * relocate_kernel.S - put the kernel image in place to boot
4 * Copyright (C) 2002-2005 Eric Biederman  <ebiederm@xmission.com>
5 */
6
7#include <linux/linkage.h>
8#include <linux/stringify.h>
9#include <asm/alternative.h>
10#include <asm/page_types.h>
11#include <asm/kexec.h>
12#include <asm/processor-flags.h>
13#include <asm/pgtable_types.h>
14#include <asm/nospec-branch.h>
15#include <asm/unwind_hints.h>
16
17/*
18 * Must be relocatable PIC code callable as a C function, in particular
19 * there must be a plain RET and not jump to return thunk.
20 */
21
22#define PTR(x) (x << 3)
23#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
24
25/*
26 * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
27 * ~ control_page + PAGE_SIZE are used as data storage and stack for
28 * jumping back
29 */
30#define DATA(offset)		(KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
31
32/* Minimal CPU state */
33#define RSP			DATA(0x0)
34#define CR0			DATA(0x8)
35#define CR3			DATA(0x10)
36#define CR4			DATA(0x18)
37
38/* other data */
39#define CP_PA_TABLE_PAGE	DATA(0x20)
40#define CP_PA_SWAP_PAGE		DATA(0x28)
41#define CP_PA_BACKUP_PAGES_MAP	DATA(0x30)
42
43	.text
44	.align PAGE_SIZE
45	.code64
46SYM_CODE_START_NOALIGN(relocate_range)
47SYM_CODE_START_NOALIGN(relocate_kernel)
48	UNWIND_HINT_END_OF_STACK
49	ANNOTATE_NOENDBR
50	/*
51	 * %rdi indirection_page
52	 * %rsi page_list
53	 * %rdx start address
54	 * %rcx preserve_context
55	 * %r8  host_mem_enc_active
56	 */
57
58	/* Save the CPU context, used for jumping back */
59	pushq %rbx
60	pushq %rbp
61	pushq %r12
62	pushq %r13
63	pushq %r14
64	pushq %r15
65	pushf
66
67	movq	PTR(VA_CONTROL_PAGE)(%rsi), %r11
68	movq	%rsp, RSP(%r11)
69	movq	%cr0, %rax
70	movq	%rax, CR0(%r11)
71	movq	%cr3, %rax
72	movq	%rax, CR3(%r11)
73	movq	%cr4, %rax
74	movq	%rax, CR4(%r11)
75
76	/* Save CR4. Required to enable the right paging mode later. */
77	movq	%rax, %r13
78
79	/* zero out flags, and disable interrupts */
80	pushq $0
81	popfq
82
83	/* Save SME active flag */
84	movq	%r8, %r12
85
86	/*
87	 * get physical address of control page now
88	 * this is impossible after page table switch
89	 */
90	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8
91
92	/* get physical address of page table now too */
93	movq	PTR(PA_TABLE_PAGE)(%rsi), %r9
94
95	/* get physical address of swap page now */
96	movq	PTR(PA_SWAP_PAGE)(%rsi), %r10
97
98	/* save some information for jumping back */
99	movq	%r9, CP_PA_TABLE_PAGE(%r11)
100	movq	%r10, CP_PA_SWAP_PAGE(%r11)
101	movq	%rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
102
103	/* Switch to the identity mapped page tables */
104	movq	%r9, %cr3
105
106	/* setup a new stack at the end of the physical control page */
107	lea	PAGE_SIZE(%r8), %rsp
108
109	/* jump to identity mapped page */
110	addq	$(identity_mapped - relocate_kernel), %r8
111	pushq	%r8
112	ANNOTATE_UNRET_SAFE
113	ret
114	int3
115SYM_CODE_END(relocate_kernel)
116
117SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
118	UNWIND_HINT_END_OF_STACK
119	/* set return address to 0 if not preserving context */
120	pushq	$0
121	/* store the start address on the stack */
122	pushq   %rdx
123
124	/*
125	 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
126	 * below.
127	 */
128	movq	%cr4, %rax
129	andq	$~(X86_CR4_CET), %rax
130	movq	%rax, %cr4
131
132	/*
133	 * Set cr0 to a known state:
134	 *  - Paging enabled
135	 *  - Alignment check disabled
136	 *  - Write protect disabled
137	 *  - No task switch
138	 *  - Don't do FP software emulation.
139	 *  - Protected mode enabled
140	 */
141	movq	%cr0, %rax
142	andq	$~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax
143	orl	$(X86_CR0_PG | X86_CR0_PE), %eax
144	movq	%rax, %cr0
145
146	/*
147	 * Set cr4 to a known state:
148	 *  - physical address extension enabled
149	 *  - 5-level paging, if it was enabled before
150	 *  - Machine check exception on TDX guest, if it was enabled before.
151	 *    Clearing MCE might not be allowed in TDX guests, depending on setup.
152	 *
153	 * Use R13 that contains the original CR4 value, read in relocate_kernel().
154	 * PAE is always set in the original CR4.
155	 */
156	andl	$(X86_CR4_PAE | X86_CR4_LA57), %r13d
157	ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST
158	movq	%r13, %cr4
159
160	/* Flush the TLB (needed?) */
161	movq	%r9, %cr3
162
163	/*
164	 * If SME is active, there could be old encrypted cache line
165	 * entries that will conflict with the now unencrypted memory
166	 * used by kexec. Flush the caches before copying the kernel.
167	 */
168	testq	%r12, %r12
169	jz .Lsme_off
170	wbinvd
171.Lsme_off:
172
173	movq	%rcx, %r11
174	call	swap_pages
175
176	/*
177	 * To be certain of avoiding problems with self-modifying code
178	 * I need to execute a serializing instruction here.
179	 * So I flush the TLB by reloading %cr3 here, it's handy,
180	 * and not processor dependent.
181	 */
182	movq	%cr3, %rax
183	movq	%rax, %cr3
184
185	/*
186	 * set all of the registers to known values
187	 * leave %rsp alone
188	 */
189
190	testq	%r11, %r11
191	jnz .Lrelocate
192	xorl	%eax, %eax
193	xorl	%ebx, %ebx
194	xorl    %ecx, %ecx
195	xorl    %edx, %edx
196	xorl    %esi, %esi
197	xorl    %edi, %edi
198	xorl    %ebp, %ebp
199	xorl	%r8d, %r8d
200	xorl	%r9d, %r9d
201	xorl	%r10d, %r10d
202	xorl	%r11d, %r11d
203	xorl	%r12d, %r12d
204	xorl	%r13d, %r13d
205	xorl	%r14d, %r14d
206	xorl	%r15d, %r15d
207
208	ANNOTATE_UNRET_SAFE
209	ret
210	int3
211
212.Lrelocate:
213	popq	%rdx
214	leaq	PAGE_SIZE(%r10), %rsp
215	ANNOTATE_RETPOLINE_SAFE
216	call	*%rdx
217
218	/* get the re-entry point of the peer system */
219	movq	0(%rsp), %rbp
220	leaq	relocate_kernel(%rip), %r8
221	movq	CP_PA_SWAP_PAGE(%r8), %r10
222	movq	CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
223	movq	CP_PA_TABLE_PAGE(%r8), %rax
224	movq	%rax, %cr3
225	lea	PAGE_SIZE(%r8), %rsp
226	call	swap_pages
227	movq	$virtual_mapped, %rax
228	pushq	%rax
229	ANNOTATE_UNRET_SAFE
230	ret
231	int3
232SYM_CODE_END(identity_mapped)
233
234SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
235	UNWIND_HINT_END_OF_STACK
236	ANNOTATE_NOENDBR // RET target, above
237	movq	RSP(%r8), %rsp
238	movq	CR4(%r8), %rax
239	movq	%rax, %cr4
240	movq	CR3(%r8), %rax
241	movq	CR0(%r8), %r8
242	movq	%rax, %cr3
243	movq	%r8, %cr0
244	movq	%rbp, %rax
245
246	popf
247	popq	%r15
248	popq	%r14
249	popq	%r13
250	popq	%r12
251	popq	%rbp
252	popq	%rbx
253	ANNOTATE_UNRET_SAFE
254	ret
255	int3
256SYM_CODE_END(virtual_mapped)
257
258	/* Do the copies */
259SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
260	UNWIND_HINT_END_OF_STACK
261	movq	%rdi, %rcx	/* Put the page_list in %rcx */
262	xorl	%edi, %edi
263	xorl	%esi, %esi
264	jmp	1f
265
2660:	/* top, read another word for the indirection page */
267
268	movq	(%rbx), %rcx
269	addq	$8,	%rbx
2701:
271	testb	$0x1,	%cl   /* is it a destination page? */
272	jz	2f
273	movq	%rcx,	%rdi
274	andq	$0xfffffffffffff000, %rdi
275	jmp	0b
2762:
277	testb	$0x2,	%cl   /* is it an indirection page? */
278	jz	2f
279	movq	%rcx,   %rbx
280	andq	$0xfffffffffffff000, %rbx
281	jmp	0b
2822:
283	testb	$0x4,	%cl   /* is it the done indicator? */
284	jz	2f
285	jmp	3f
2862:
287	testb	$0x8,	%cl   /* is it the source indicator? */
288	jz	0b	      /* Ignore it otherwise */
289	movq	%rcx,   %rsi  /* For ever source page do a copy */
290	andq	$0xfffffffffffff000, %rsi
291
292	movq	%rdi, %rdx
293	movq	%rsi, %rax
294
295	movq	%r10, %rdi
296	movl	$512, %ecx
297	rep ; movsq
298
299	movq	%rax, %rdi
300	movq	%rdx, %rsi
301	movl	$512, %ecx
302	rep ; movsq
303
304	movq	%rdx, %rdi
305	movq	%r10, %rsi
306	movl	$512, %ecx
307	rep ; movsq
308
309	lea	PAGE_SIZE(%rax), %rsi
310	jmp	0b
3113:
312	ANNOTATE_UNRET_SAFE
313	ret
314	int3
315SYM_CODE_END(swap_pages)
316
317	.skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc
318SYM_CODE_END(relocate_range);
319