1/* SPDX-License-Identifier: GPL-2.0 */ 2 3/* 4 * Copyright C 2016, Oracle and/or its affiliates. All rights reserved. 5 */ 6 7 .code32 8 .text 9#ifdef CONFIG_X86_32 10#define _pa(x) ((x) - __START_KERNEL_map) 11#endif 12#define rva(x) ((x) - pvh_start_xen) 13 14#include <linux/elfnote.h> 15#include <linux/init.h> 16#include <linux/linkage.h> 17#include <asm/desc_defs.h> 18#include <asm/segment.h> 19#include <asm/asm.h> 20#include <asm/boot.h> 21#include <asm/pgtable.h> 22#include <asm/processor-flags.h> 23#include <asm/msr.h> 24#include <asm/nospec-branch.h> 25#include <xen/interface/elfnote.h> 26 27 __HEAD 28 29/* 30 * Entry point for PVH guests. 31 * 32 * Xen ABI specifies the following register state when we come here: 33 * 34 * - `ebx`: contains the physical memory address where the loader has placed 35 * the boot start info structure. 36 * - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared. 37 * - `cr4`: all bits are cleared. 38 * - `cs `: must be a 32-bit read/execute code segment with a base of `0` 39 * and a limit of `0xFFFFFFFF`. The selector value is unspecified. 40 * - `ds`, `es`: must be a 32-bit read/write data segment with a base of 41 * `0` and a limit of `0xFFFFFFFF`. The selector values are all 42 * unspecified. 43 * - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit 44 * of '0x67'. 45 * - `eflags`: bit 17 (VM) must be cleared. Bit 9 (IF) must be cleared. 46 * Bit 8 (TF) must be cleared. Other bits are all unspecified. 47 * 48 * All other processor registers and flag bits are unspecified. The OS is in 49 * charge of setting up its own stack, GDT and IDT. 50 */ 51 52#define PVH_GDT_ENTRY_CS 1 53#define PVH_GDT_ENTRY_DS 2 54#define PVH_CS_SEL (PVH_GDT_ENTRY_CS * 8) 55#define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8) 56 57SYM_CODE_START(pvh_start_xen) 58 UNWIND_HINT_END_OF_STACK 59 cld 60 61 /* 62 * See the comment for startup_32 for more details. We need to 63 * execute a call to get the execution address to be position 64 * independent, but we don't have a stack. Save and restore the 65 * magic field of start_info in ebx, and use that as the stack. 66 */ 67 mov (%ebx), %eax 68 leal 4(%ebx), %esp 69 ANNOTATE_INTRA_FUNCTION_CALL 70 call 1f 711: popl %ebp 72 mov %eax, (%ebx) 73 subl $rva(1b), %ebp 74 movl $0, %esp 75 76 leal rva(gdt)(%ebp), %eax 77 addl %eax, 2(%eax) 78 lgdt (%eax) 79 80 mov $PVH_DS_SEL,%eax 81 mov %eax,%ds 82 mov %eax,%es 83 mov %eax,%ss 84 85 /* Stash hvm_start_info. */ 86 leal rva(pvh_start_info)(%ebp), %edi 87 mov %ebx, %esi 88 movl rva(pvh_start_info_sz)(%ebp), %ecx 89 shr $2,%ecx 90 rep 91 movsl 92 93 leal rva(early_stack_end)(%ebp), %esp 94 95 /* Enable PAE mode. */ 96 mov %cr4, %eax 97 orl $X86_CR4_PAE, %eax 98 mov %eax, %cr4 99 100#ifdef CONFIG_X86_64 101 /* Enable Long mode. */ 102 mov $MSR_EFER, %ecx 103 rdmsr 104 btsl $_EFER_LME, %eax 105 wrmsr 106 107 /* 108 * Reuse the non-relocatable symbol emitted for the ELF note to 109 * subtract the build time physical address of pvh_start_xen() from 110 * its actual runtime address, without relying on absolute 32-bit ELF 111 * relocations, as these are not supported by the linker when running 112 * in -pie mode, and should be avoided in .head.text in general. 113 */ 114 mov %ebp, %ebx 115 subl rva(xen_elfnote_phys32_entry)(%ebp), %ebx 116 jz .Lpagetable_done 117 118 /* 119 * Store the resulting load offset in phys_base. __pa() needs 120 * phys_base set to calculate the hypercall page in xen_pvh_init(). 121 */ 122 movl %ebx, rva(phys_base)(%ebp) 123 124 /* Fixup page-tables for relocation. */ 125 leal rva(pvh_init_top_pgt)(%ebp), %edi 126 movl $PTRS_PER_PGD, %ecx 1272: 128 testl $_PAGE_PRESENT, 0x00(%edi) 129 jz 1f 130 addl %ebx, 0x00(%edi) 1311: 132 addl $8, %edi 133 decl %ecx 134 jnz 2b 135 136 /* L3 ident has a single entry. */ 137 leal rva(pvh_level3_ident_pgt)(%ebp), %edi 138 addl %ebx, 0x00(%edi) 139 140 leal rva(pvh_level3_kernel_pgt)(%ebp), %edi 141 addl %ebx, (PAGE_SIZE - 16)(%edi) 142 addl %ebx, (PAGE_SIZE - 8)(%edi) 143 144 /* pvh_level2_ident_pgt is fine - large pages */ 145 146 /* pvh_level2_kernel_pgt needs adjustment - large pages */ 147 leal rva(pvh_level2_kernel_pgt)(%ebp), %edi 148 movl $PTRS_PER_PMD, %ecx 1492: 150 testl $_PAGE_PRESENT, 0x00(%edi) 151 jz 1f 152 addl %ebx, 0x00(%edi) 1531: 154 addl $8, %edi 155 decl %ecx 156 jnz 2b 157 158.Lpagetable_done: 159 /* Enable pre-constructed page tables. */ 160 leal rva(pvh_init_top_pgt)(%ebp), %eax 161 mov %eax, %cr3 162 mov $(X86_CR0_PG | X86_CR0_PE), %eax 163 mov %eax, %cr0 164 165 /* Jump to 64-bit mode. */ 166 pushl $PVH_CS_SEL 167 leal rva(1f)(%ebp), %eax 168 pushl %eax 169 lretl 170 171 /* 64-bit entry point. */ 172 .code64 1731: 174 UNWIND_HINT_END_OF_STACK 175 176 /* 177 * Set up GSBASE. 178 * Note that on SMP the boot CPU uses the init data section until 179 * the per-CPU areas are set up. 180 */ 181 movl $MSR_GS_BASE,%ecx 182 xorl %eax, %eax 183 xorl %edx, %edx 184 wrmsr 185 186 /* Call xen_prepare_pvh() via the kernel virtual mapping */ 187 leaq xen_prepare_pvh(%rip), %rax 188 subq phys_base(%rip), %rax 189 addq $__START_KERNEL_map, %rax 190 ANNOTATE_RETPOLINE_SAFE 191 call *%rax 192 193 /* startup_64 expects boot_params in %rsi. */ 194 lea pvh_bootparams(%rip), %rsi 195 jmp startup_64 196 197#else /* CONFIG_X86_64 */ 198 199 call mk_early_pgtbl_32 200 201 mov $_pa(initial_page_table), %eax 202 mov %eax, %cr3 203 204 mov %cr0, %eax 205 or $(X86_CR0_PG | X86_CR0_PE), %eax 206 mov %eax, %cr0 207 208 ljmp $PVH_CS_SEL, $1f 2091: 210 call xen_prepare_pvh 211 mov $_pa(pvh_bootparams), %esi 212 213 /* startup_32 doesn't expect paging and PAE to be on. */ 214 ljmp $PVH_CS_SEL, $_pa(2f) 2152: 216 mov %cr0, %eax 217 and $~X86_CR0_PG, %eax 218 mov %eax, %cr0 219 mov %cr4, %eax 220 and $~X86_CR4_PAE, %eax 221 mov %eax, %cr4 222 223 ljmp $PVH_CS_SEL, $_pa(startup_32) 224#endif 225SYM_CODE_END(pvh_start_xen) 226 227 .section ".init.data","aw" 228 .balign 8 229SYM_DATA_START_LOCAL(gdt) 230 .word gdt_end - gdt_start - 1 231 .long gdt_start - gdt 232 .word 0 233SYM_DATA_END(gdt) 234SYM_DATA_START_LOCAL(gdt_start) 235 .quad 0x0000000000000000 /* NULL descriptor */ 236#ifdef CONFIG_X86_64 237 .quad GDT_ENTRY(DESC_CODE64, 0, 0xfffff) /* PVH_CS_SEL */ 238#else 239 .quad GDT_ENTRY(DESC_CODE32, 0, 0xfffff) /* PVH_CS_SEL */ 240#endif 241 .quad GDT_ENTRY(DESC_DATA32, 0, 0xfffff) /* PVH_DS_SEL */ 242SYM_DATA_END_LABEL(gdt_start, SYM_L_LOCAL, gdt_end) 243 244 .balign 16 245SYM_DATA_START_LOCAL(early_stack) 246 .fill BOOT_STACK_SIZE, 1, 0 247SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end) 248 249#ifdef CONFIG_X86_64 250/* 251 * Xen PVH needs a set of identity mapped and kernel high mapping 252 * page tables. pvh_start_xen starts running on the identity mapped 253 * page tables, but xen_prepare_pvh calls into the high mapping. 254 * These page tables need to be relocatable and are only used until 255 * startup_64 transitions to init_top_pgt. 256 */ 257SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt) 258 .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 259 .org pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0 260 .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 261 .org pvh_init_top_pgt + L4_START_KERNEL * 8, 0 262 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 263 .quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 264SYM_DATA_END(pvh_init_top_pgt) 265 266SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt) 267 .quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 268 .fill 511, 8, 0 269SYM_DATA_END(pvh_level3_ident_pgt) 270SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt) 271 /* 272 * Since I easily can, map the first 1G. 273 * Don't set NX because code runs from these pages. 274 * 275 * Note: This sets _PAGE_GLOBAL despite whether 276 * the CPU supports it or it is enabled. But, 277 * the CPU should ignore the bit. 278 */ 279 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) 280SYM_DATA_END(pvh_level2_ident_pgt) 281SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt) 282 .fill L3_START_KERNEL, 8, 0 283 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ 284 .quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 285 .quad 0 /* no fixmap */ 286SYM_DATA_END(pvh_level3_kernel_pgt) 287 288SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt) 289 /* 290 * Kernel high mapping. 291 * 292 * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in 293 * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled, 294 * 512 MiB otherwise. 295 * 296 * (NOTE: after that starts the module area, see MODULES_VADDR.) 297 * 298 * This table is eventually used by the kernel during normal runtime. 299 * Care must be taken to clear out undesired bits later, like _PAGE_RW 300 * or _PAGE_GLOBAL in some cases. 301 */ 302 PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE) 303SYM_DATA_END(pvh_level2_kernel_pgt) 304 305 ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC, 306 .long CONFIG_PHYSICAL_ALIGN; 307 .long LOAD_PHYSICAL_ADDR; 308 .long KERNEL_IMAGE_SIZE - 1) 309#endif 310 311 ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .global xen_elfnote_phys32_entry; 312 xen_elfnote_phys32_entry: _ASM_PTR xen_elfnote_phys32_entry_value - .) 313