xref: /linux/arch/x86/platform/pvh/head.S (revision 6f7e6393d1ce636bb7ec77a7fe7b77458fddf701)
1/* SPDX-License-Identifier: GPL-2.0 */
2
3/*
4 * Copyright C 2016, Oracle and/or its affiliates. All rights reserved.
5 */
6
7	.code32
8	.text
9#ifdef CONFIG_X86_32
10#define _pa(x)          ((x) - __START_KERNEL_map)
11#endif
12#define rva(x)          ((x) - pvh_start_xen)
13
14#include <linux/elfnote.h>
15#include <linux/init.h>
16#include <linux/linkage.h>
17#include <asm/desc_defs.h>
18#include <asm/segment.h>
19#include <asm/asm.h>
20#include <asm/boot.h>
21#include <asm/pgtable.h>
22#include <asm/processor-flags.h>
23#include <asm/msr.h>
24#include <asm/nospec-branch.h>
25#include <xen/interface/elfnote.h>
26
27	__INIT
28
29/*
30 * Entry point for PVH guests.
31 *
32 * Xen ABI specifies the following register state when we come here:
33 *
34 * - `ebx`: contains the physical memory address where the loader has placed
35 *          the boot start info structure.
36 * - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared.
37 * - `cr4`: all bits are cleared.
38 * - `cs `: must be a 32-bit read/execute code segment with a base of `0`
39 *          and a limit of `0xFFFFFFFF`. The selector value is unspecified.
40 * - `ds`, `es`: must be a 32-bit read/write data segment with a base of
41 *               `0` and a limit of `0xFFFFFFFF`. The selector values are all
42 *               unspecified.
43 * - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit
44 *         of '0x67'.
45 * - `eflags`: bit 17 (VM) must be cleared. Bit 9 (IF) must be cleared.
46 *             Bit 8 (TF) must be cleared. Other bits are all unspecified.
47 *
48 * All other processor registers and flag bits are unspecified. The OS is in
49 * charge of setting up its own stack, GDT and IDT.
50 */
51
52#define PVH_GDT_ENTRY_CS	1
53#define PVH_GDT_ENTRY_DS	2
54#define PVH_CS_SEL		(PVH_GDT_ENTRY_CS * 8)
55#define PVH_DS_SEL		(PVH_GDT_ENTRY_DS * 8)
56
57SYM_CODE_START(pvh_start_xen)
58	UNWIND_HINT_END_OF_STACK
59	cld
60
61	/*
62	 * See the comment for startup_32 for more details.  We need to
63	 * execute a call to get the execution address to be position
64	 * independent, but we don't have a stack.  Save and restore the
65	 * magic field of start_info in ebx, and use that as the stack.
66	 */
67	mov  (%ebx), %eax
68	leal 4(%ebx), %esp
69	ANNOTATE_INTRA_FUNCTION_CALL
70	call 1f
711:	popl %ebp
72	mov  %eax, (%ebx)
73	subl $rva(1b), %ebp
74	movl $0, %esp
75
76	leal rva(gdt)(%ebp), %eax
77	addl %eax, 2(%eax)
78	lgdt (%eax)
79
80	mov $PVH_DS_SEL,%eax
81	mov %eax,%ds
82	mov %eax,%es
83	mov %eax,%ss
84
85	/* Stash hvm_start_info. */
86	leal rva(pvh_start_info)(%ebp), %edi
87	mov %ebx, %esi
88	movl rva(pvh_start_info_sz)(%ebp), %ecx
89	shr $2,%ecx
90	rep movsl
91
92	leal rva(early_stack_end)(%ebp), %esp
93
94#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
95	/* Enable PAE mode. */
96	mov %cr4, %eax
97	orl $X86_CR4_PAE, %eax
98	mov %eax, %cr4
99#endif
100
101#ifdef CONFIG_X86_64
102	/* Enable Long mode. */
103	mov $MSR_EFER, %ecx
104	rdmsr
105	btsl $_EFER_LME, %eax
106	wrmsr
107
108	/*
109	 * Reuse the non-relocatable symbol emitted for the ELF note to
110	 * subtract the build time physical address of pvh_start_xen() from
111	 * its actual runtime address, without relying on absolute 32-bit ELF
112	 * relocations, as these are not supported by the linker when running
113	 * in -pie mode, and should be avoided in .head.text in general.
114	 */
115	mov %ebp, %ebx
116	subl rva(xen_elfnote_phys32_entry)(%ebp), %ebx
117	jz .Lpagetable_done
118
119	/*
120	 * Store the resulting load offset in phys_base.  __pa() needs
121	 * phys_base set to calculate the hypercall page in xen_pvh_init().
122	 */
123	movl %ebx, rva(phys_base)(%ebp)
124
125	/* Fixup page-tables for relocation. */
126	leal rva(pvh_init_top_pgt)(%ebp), %edi
127	movl $PTRS_PER_PGD, %ecx
1282:
129	testl $_PAGE_PRESENT, 0x00(%edi)
130	jz 1f
131	addl %ebx, 0x00(%edi)
1321:
133	addl $8, %edi
134	decl %ecx
135	jnz 2b
136
137	/* L3 ident has a single entry. */
138	leal rva(pvh_level3_ident_pgt)(%ebp), %edi
139	addl %ebx, 0x00(%edi)
140
141	leal rva(pvh_level3_kernel_pgt)(%ebp), %edi
142	addl %ebx, (PAGE_SIZE - 16)(%edi)
143	addl %ebx, (PAGE_SIZE - 8)(%edi)
144
145	/* pvh_level2_ident_pgt is fine - large pages */
146
147	/* pvh_level2_kernel_pgt needs adjustment - large pages */
148	leal rva(pvh_level2_kernel_pgt)(%ebp), %edi
149	movl $PTRS_PER_PMD, %ecx
1502:
151	testl $_PAGE_PRESENT, 0x00(%edi)
152	jz 1f
153	addl %ebx, 0x00(%edi)
1541:
155	addl $8, %edi
156	decl %ecx
157	jnz 2b
158
159.Lpagetable_done:
160	/* Enable pre-constructed page tables. */
161	leal rva(pvh_init_top_pgt)(%ebp), %eax
162	mov %eax, %cr3
163	mov $(X86_CR0_PG | X86_CR0_PE), %eax
164	mov %eax, %cr0
165
166	/* Jump to 64-bit mode. */
167	pushl $PVH_CS_SEL
168	leal  rva(1f)(%ebp), %eax
169	pushl %eax
170	lretl
171
172	/* 64-bit entry point. */
173	.code64
1741:
175	UNWIND_HINT_END_OF_STACK
176
177	/*
178	 * Set up GSBASE.
179	 * Note that on SMP the boot CPU uses the init data section until
180	 * the per-CPU areas are set up.
181	 */
182	movl $MSR_GS_BASE,%ecx
183	xorl %eax, %eax
184	xorl %edx, %edx
185	wrmsr
186
187	/* Call xen_prepare_pvh() via the kernel virtual mapping */
188	leaq xen_prepare_pvh(%rip), %rax
189	subq phys_base(%rip), %rax
190	addq $__START_KERNEL_map, %rax
191	ANNOTATE_RETPOLINE_SAFE
192	call *%rax
193
194	/* startup_64 expects boot_params in %rsi. */
195	lea pvh_bootparams(%rip), %rsi
196	jmp startup_64
197
198#else /* CONFIG_X86_64 */
199
200	call mk_early_pgtbl_32
201
202	mov $_pa(initial_page_table), %eax
203	mov %eax, %cr3
204
205	mov %cr0, %eax
206	or $(X86_CR0_PG | X86_CR0_PE), %eax
207	mov %eax, %cr0
208
209	ljmp $PVH_CS_SEL, $1f
2101:
211	call xen_prepare_pvh
212	mov $_pa(pvh_bootparams), %esi
213
214	/* startup_32 doesn't expect paging and PAE to be on. */
215	ljmp $PVH_CS_SEL, $_pa(2f)
2162:
217	mov %cr0, %eax
218	and $~X86_CR0_PG, %eax
219	mov %eax, %cr0
220	mov %cr4, %eax
221	and $~X86_CR4_PAE, %eax
222	mov %eax, %cr4
223
224	ljmp $PVH_CS_SEL, $_pa(startup_32)
225#endif
226SYM_CODE_END(pvh_start_xen)
227
228	.section ".init.data","aw"
229	.balign 8
230SYM_DATA_START_LOCAL(gdt)
231	.word gdt_end - gdt_start - 1
232	.long gdt_start - gdt
233	.word 0
234SYM_DATA_END(gdt)
235SYM_DATA_START_LOCAL(gdt_start)
236	.quad 0x0000000000000000            /* NULL descriptor */
237#ifdef CONFIG_X86_64
238	.quad GDT_ENTRY(DESC_CODE64, 0, 0xfffff) /* PVH_CS_SEL */
239#else
240	.quad GDT_ENTRY(DESC_CODE32, 0, 0xfffff) /* PVH_CS_SEL */
241#endif
242	.quad GDT_ENTRY(DESC_DATA32, 0, 0xfffff) /* PVH_DS_SEL */
243SYM_DATA_END_LABEL(gdt_start, SYM_L_LOCAL, gdt_end)
244
245	.balign 16
246SYM_DATA_START_LOCAL(early_stack)
247	.fill BOOT_STACK_SIZE, 1, 0
248SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end)
249
250#ifdef CONFIG_X86_64
251/*
252 * Xen PVH needs a set of identity mapped and kernel high mapping
253 * page tables.  pvh_start_xen starts running on the identity mapped
254 * page tables, but xen_prepare_pvh calls into the high mapping.
255 * These page tables need to be relocatable and are only used until
256 * startup_64 transitions to init_top_pgt.
257 */
258SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt)
259	.quad   pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
260	.org    pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0
261	.quad   pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
262	.org    pvh_init_top_pgt + L4_START_KERNEL * 8, 0
263	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
264	.quad   pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
265SYM_DATA_END(pvh_init_top_pgt)
266
267SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt)
268	.quad	pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
269	.fill	511, 8, 0
270SYM_DATA_END(pvh_level3_ident_pgt)
271SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt)
272	/*
273	 * Since I easily can, map the first 1G.
274	 * Don't set NX because code runs from these pages.
275	 *
276	 * Note: This sets _PAGE_GLOBAL despite whether
277	 * the CPU supports it or it is enabled.  But,
278	 * the CPU should ignore the bit.
279	 */
280	PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
281SYM_DATA_END(pvh_level2_ident_pgt)
282SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt)
283	.fill	L3_START_KERNEL, 8, 0
284	/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
285	.quad	pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
286	.quad	0 /* no fixmap */
287SYM_DATA_END(pvh_level3_kernel_pgt)
288
289SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt)
290	/*
291	 * Kernel high mapping.
292	 *
293	 * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in
294	 * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled,
295	 * 512 MiB otherwise.
296	 *
297	 * (NOTE: after that starts the module area, see MODULES_VADDR.)
298	 *
299	 * This table is eventually used by the kernel during normal runtime.
300	 * Care must be taken to clear out undesired bits later, like _PAGE_RW
301	 * or _PAGE_GLOBAL in some cases.
302	 */
303	PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE)
304SYM_DATA_END(pvh_level2_kernel_pgt)
305
306	ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC,
307		     .long CONFIG_PHYSICAL_ALIGN;
308		     .long LOAD_PHYSICAL_ADDR;
309		     .long KERNEL_IMAGE_SIZE - 1)
310#endif
311
312	ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .global xen_elfnote_phys32_entry;
313		xen_elfnote_phys32_entry: _ASM_PTR xen_elfnote_phys32_entry_value - .)
314