xref: /linux/arch/x86/kernel/relocate_kernel_64.S (revision c7546e2c3cb739a3c1a2f5acaf9bb629d401afe5)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * relocate_kernel.S - put the kernel image in place to boot
4 * Copyright (C) 2002-2005 Eric Biederman  <ebiederm@xmission.com>
5 */
6
7#include <linux/linkage.h>
8#include <linux/stringify.h>
9#include <asm/alternative.h>
10#include <asm/page_types.h>
11#include <asm/kexec.h>
12#include <asm/processor-flags.h>
13#include <asm/pgtable_types.h>
14#include <asm/nospec-branch.h>
15#include <asm/unwind_hints.h>
16
17/*
18 * Must be relocatable PIC code callable as a C function, in particular
19 * there must be a plain RET and not jump to return thunk.
20 */
21
22#define PTR(x) (x << 3)
23#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
24
25/*
26 * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
27 * ~ control_page + PAGE_SIZE are used as data storage and stack for
28 * jumping back
29 */
30#define DATA(offset)		(KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
31
32/* Minimal CPU state */
33#define RSP			DATA(0x0)
34#define CR0			DATA(0x8)
35#define CR3			DATA(0x10)
36#define CR4			DATA(0x18)
37
38/* other data */
39#define CP_PA_TABLE_PAGE	DATA(0x20)
40#define CP_PA_SWAP_PAGE		DATA(0x28)
41#define CP_PA_BACKUP_PAGES_MAP	DATA(0x30)
42
43	.text
44	.align PAGE_SIZE
45	.code64
46SYM_CODE_START_NOALIGN(relocate_range)
47SYM_CODE_START_NOALIGN(relocate_kernel)
48	UNWIND_HINT_END_OF_STACK
49	ANNOTATE_NOENDBR
50	/*
51	 * %rdi indirection_page
52	 * %rsi page_list
53	 * %rdx start address
54	 * %rcx preserve_context
55	 * %r8  host_mem_enc_active
56	 */
57
58	/* Save the CPU context, used for jumping back */
59	pushq %rbx
60	pushq %rbp
61	pushq %r12
62	pushq %r13
63	pushq %r14
64	pushq %r15
65	pushf
66
67	movq	PTR(VA_CONTROL_PAGE)(%rsi), %r11
68	movq	%rsp, RSP(%r11)
69	movq	%cr0, %rax
70	movq	%rax, CR0(%r11)
71	movq	%cr3, %rax
72	movq	%rax, CR3(%r11)
73	movq	%cr4, %rax
74	movq	%rax, CR4(%r11)
75
76	/* Save CR4. Required to enable the right paging mode later. */
77	movq	%rax, %r13
78
79	/* zero out flags, and disable interrupts */
80	pushq $0
81	popfq
82
83	/* Save SME active flag */
84	movq	%r8, %r12
85
86	/*
87	 * get physical address of control page now
88	 * this is impossible after page table switch
89	 */
90	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8
91
92	/* get physical address of page table now too */
93	movq	PTR(PA_TABLE_PAGE)(%rsi), %r9
94
95	/* get physical address of swap page now */
96	movq	PTR(PA_SWAP_PAGE)(%rsi), %r10
97
98	/* save some information for jumping back */
99	movq	%r9, CP_PA_TABLE_PAGE(%r11)
100	movq	%r10, CP_PA_SWAP_PAGE(%r11)
101	movq	%rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
102
103	/* Switch to the identity mapped page tables */
104	movq	%r9, %cr3
105
106	/* setup a new stack at the end of the physical control page */
107	lea	PAGE_SIZE(%r8), %rsp
108
109	/* jump to identity mapped page */
110	addq	$(identity_mapped - relocate_kernel), %r8
111	pushq	%r8
112	ANNOTATE_UNRET_SAFE
113	ret
114	int3
115SYM_CODE_END(relocate_kernel)
116
117SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
118	UNWIND_HINT_END_OF_STACK
119	/* set return address to 0 if not preserving context */
120	pushq	$0
121	/* store the start address on the stack */
122	pushq   %rdx
123
124	/*
125	 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
126	 * below.
127	 */
128	movq	%cr4, %rax
129	andq	$~(X86_CR4_CET), %rax
130	movq	%rax, %cr4
131
132	/*
133	 * Set cr0 to a known state:
134	 *  - Paging enabled
135	 *  - Alignment check disabled
136	 *  - Write protect disabled
137	 *  - No task switch
138	 *  - Don't do FP software emulation.
139	 *  - Protected mode enabled
140	 */
141	movq	%cr0, %rax
142	andq	$~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax
143	orl	$(X86_CR0_PG | X86_CR0_PE), %eax
144	movq	%rax, %cr0
145
146	/*
147	 * Set cr4 to a known state:
148	 *  - physical address extension enabled
149	 *  - 5-level paging, if it was enabled before
150	 *  - Machine check exception on TDX guest, if it was enabled before.
151	 *    Clearing MCE might not be allowed in TDX guests, depending on setup.
152	 *
153	 * Use R13 that contains the original CR4 value, read in relocate_kernel().
154	 * PAE is always set in the original CR4.
155	 */
156	andl	$(X86_CR4_PAE | X86_CR4_LA57), %r13d
157	ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST
158	movq	%r13, %cr4
159
160	/* Flush the TLB (needed?) */
161	movq	%r9, %cr3
162
163	/*
164	 * If SME is active, there could be old encrypted cache line
165	 * entries that will conflict with the now unencrypted memory
166	 * used by kexec. Flush the caches before copying the kernel.
167	 */
168	testq	%r12, %r12
169	jz .Lsme_off
170	wbinvd
171.Lsme_off:
172
173	/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
174	movq	%rcx, %r11
175	call	swap_pages
176
177	/*
178	 * To be certain of avoiding problems with self-modifying code
179	 * I need to execute a serializing instruction here.
180	 * So I flush the TLB by reloading %cr3 here, it's handy,
181	 * and not processor dependent.
182	 */
183	movq	%cr3, %rax
184	movq	%rax, %cr3
185
186	/*
187	 * set all of the registers to known values
188	 * leave %rsp alone
189	 */
190
191	testq	%r11, %r11
192	jnz .Lrelocate
193	xorl	%eax, %eax
194	xorl	%ebx, %ebx
195	xorl    %ecx, %ecx
196	xorl    %edx, %edx
197	xorl    %esi, %esi
198	xorl    %edi, %edi
199	xorl    %ebp, %ebp
200	xorl	%r8d, %r8d
201	xorl	%r9d, %r9d
202	xorl	%r10d, %r10d
203	xorl	%r11d, %r11d
204	xorl	%r12d, %r12d
205	xorl	%r13d, %r13d
206	xorl	%r14d, %r14d
207	xorl	%r15d, %r15d
208
209	ANNOTATE_UNRET_SAFE
210	ret
211	int3
212
213.Lrelocate:
214	popq	%rdx
215	leaq	PAGE_SIZE(%r10), %rsp
216	ANNOTATE_RETPOLINE_SAFE
217	call	*%rdx
218
219	/* get the re-entry point of the peer system */
220	movq	0(%rsp), %rbp
221	leaq	relocate_kernel(%rip), %r8
222	movq	CP_PA_SWAP_PAGE(%r8), %r10
223	movq	CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
224	movq	CP_PA_TABLE_PAGE(%r8), %rax
225	movq	%rax, %cr3
226	lea	PAGE_SIZE(%r8), %rsp
227	call	swap_pages
228	movq	$virtual_mapped, %rax
229	pushq	%rax
230	ANNOTATE_UNRET_SAFE
231	ret
232	int3
233SYM_CODE_END(identity_mapped)
234
235SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
236	UNWIND_HINT_END_OF_STACK
237	ANNOTATE_NOENDBR // RET target, above
238	movq	RSP(%r8), %rsp
239	movq	CR4(%r8), %rax
240	movq	%rax, %cr4
241	movq	CR3(%r8), %rax
242	movq	CR0(%r8), %r8
243	movq	%rax, %cr3
244	movq	%r8, %cr0
245	movq	%rbp, %rax
246
247	popf
248	popq	%r15
249	popq	%r14
250	popq	%r13
251	popq	%r12
252	popq	%rbp
253	popq	%rbx
254	ANNOTATE_UNRET_SAFE
255	ret
256	int3
257SYM_CODE_END(virtual_mapped)
258
259	/* Do the copies */
260SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
261	UNWIND_HINT_END_OF_STACK
262	movq	%rdi, %rcx	/* Put the indirection_page in %rcx */
263	xorl	%edi, %edi
264	xorl	%esi, %esi
265	jmp	1f
266
2670:	/* top, read another word for the indirection page */
268
269	movq	(%rbx), %rcx
270	addq	$8,	%rbx
2711:
272	testb	$0x1,	%cl   /* is it a destination page? */
273	jz	2f
274	movq	%rcx,	%rdi
275	andq	$0xfffffffffffff000, %rdi
276	jmp	0b
2772:
278	testb	$0x2,	%cl   /* is it an indirection page? */
279	jz	2f
280	movq	%rcx,   %rbx
281	andq	$0xfffffffffffff000, %rbx
282	jmp	0b
2832:
284	testb	$0x4,	%cl   /* is it the done indicator? */
285	jz	2f
286	jmp	3f
2872:
288	testb	$0x8,	%cl   /* is it the source indicator? */
289	jz	0b	      /* Ignore it otherwise */
290	movq	%rcx,   %rsi  /* For ever source page do a copy */
291	andq	$0xfffffffffffff000, %rsi
292
293	movq	%rdi, %rdx    /* Save destination page to %rdx */
294	movq	%rsi, %rax    /* Save source page to %rax */
295
296	/* copy source page to swap page */
297	movq	%r10, %rdi
298	movl	$512, %ecx
299	rep ; movsq
300
301	/* copy destination page to source page */
302	movq	%rax, %rdi
303	movq	%rdx, %rsi
304	movl	$512, %ecx
305	rep ; movsq
306
307	/* copy swap page to destination page */
308	movq	%rdx, %rdi
309	movq	%r10, %rsi
310	movl	$512, %ecx
311	rep ; movsq
312
313	lea	PAGE_SIZE(%rax), %rsi
314	jmp	0b
3153:
316	ANNOTATE_UNRET_SAFE
317	ret
318	int3
319SYM_CODE_END(swap_pages)
320
321	.skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc
322SYM_CODE_END(relocate_range);
323