1/* SPDX-License-Identifier: GPL-2.0 */ 2 3/* 4 * Copyright C 2016, Oracle and/or its affiliates. All rights reserved. 5 */ 6 7 .code32 8 .text 9#ifdef CONFIG_X86_32 10#define _pa(x) ((x) - __START_KERNEL_map) 11#endif 12#define rva(x) ((x) - pvh_start_xen) 13 14#include <linux/elfnote.h> 15#include <linux/init.h> 16#include <linux/linkage.h> 17#include <asm/desc_defs.h> 18#include <asm/segment.h> 19#include <asm/asm.h> 20#include <asm/boot.h> 21#include <asm/pgtable.h> 22#include <asm/processor-flags.h> 23#include <asm/msr.h> 24#include <asm/nospec-branch.h> 25#include <xen/interface/elfnote.h> 26 27 __INIT 28 29/* 30 * Entry point for PVH guests. 31 * 32 * Xen ABI specifies the following register state when we come here: 33 * 34 * - `ebx`: contains the physical memory address where the loader has placed 35 * the boot start info structure. 36 * - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared. 37 * - `cr4`: all bits are cleared. 38 * - `cs `: must be a 32-bit read/execute code segment with a base of `0` 39 * and a limit of `0xFFFFFFFF`. The selector value is unspecified. 40 * - `ds`, `es`: must be a 32-bit read/write data segment with a base of 41 * `0` and a limit of `0xFFFFFFFF`. The selector values are all 42 * unspecified. 43 * - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit 44 * of '0x67'. 45 * - `eflags`: bit 17 (VM) must be cleared. Bit 9 (IF) must be cleared. 46 * Bit 8 (TF) must be cleared. Other bits are all unspecified. 47 * 48 * All other processor registers and flag bits are unspecified. The OS is in 49 * charge of setting up its own stack, GDT and IDT. 50 */ 51 52#define PVH_GDT_ENTRY_CS 1 53#define PVH_GDT_ENTRY_DS 2 54#define PVH_CS_SEL (PVH_GDT_ENTRY_CS * 8) 55#define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8) 56 57SYM_CODE_START(pvh_start_xen) 58 UNWIND_HINT_END_OF_STACK 59 cld 60 61 /* 62 * See the comment for startup_32 for more details. We need to 63 * execute a call to get the execution address to be position 64 * independent, but we don't have a stack. Save and restore the 65 * magic field of start_info in ebx, and use that as the stack. 66 */ 67 mov (%ebx), %eax 68 leal 4(%ebx), %esp 69 ANNOTATE_INTRA_FUNCTION_CALL 70 call 1f 711: popl %ebp 72 mov %eax, (%ebx) 73 subl $rva(1b), %ebp 74 movl $0, %esp 75 76 leal rva(gdt)(%ebp), %eax 77 addl %eax, 2(%eax) 78 lgdt (%eax) 79 80 mov $PVH_DS_SEL,%eax 81 mov %eax,%ds 82 mov %eax,%es 83 mov %eax,%ss 84 85 /* Stash hvm_start_info. */ 86 leal rva(pvh_start_info)(%ebp), %edi 87 mov %ebx, %esi 88 movl rva(pvh_start_info_sz)(%ebp), %ecx 89 shr $2,%ecx 90 rep movsl 91 92 leal rva(early_stack_end)(%ebp), %esp 93 94#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 95 /* Enable PAE mode. */ 96 mov %cr4, %eax 97 orl $X86_CR4_PAE, %eax 98 mov %eax, %cr4 99#endif 100 101#ifdef CONFIG_X86_64 102 /* Enable Long mode. */ 103 mov $MSR_EFER, %ecx 104 rdmsr 105 btsl $_EFER_LME, %eax 106 wrmsr 107 108 /* 109 * Reuse the non-relocatable symbol emitted for the ELF note to 110 * subtract the build time physical address of pvh_start_xen() from 111 * its actual runtime address, without relying on absolute 32-bit ELF 112 * relocations, as these are not supported by the linker when running 113 * in -pie mode, and should be avoided in .head.text in general. 114 */ 115 mov %ebp, %ebx 116 subl rva(xen_elfnote_phys32_entry)(%ebp), %ebx 117 jz .Lpagetable_done 118 119 /* 120 * Store the resulting load offset in phys_base. __pa() needs 121 * phys_base set to calculate the hypercall page in xen_pvh_init(). 122 */ 123 movl %ebx, rva(phys_base)(%ebp) 124 125 /* Fixup page-tables for relocation. */ 126 leal rva(pvh_init_top_pgt)(%ebp), %edi 127 movl $PTRS_PER_PGD, %ecx 1282: 129 testl $_PAGE_PRESENT, 0x00(%edi) 130 jz 1f 131 addl %ebx, 0x00(%edi) 1321: 133 addl $8, %edi 134 decl %ecx 135 jnz 2b 136 137 /* L3 ident has a single entry. */ 138 leal rva(pvh_level3_ident_pgt)(%ebp), %edi 139 addl %ebx, 0x00(%edi) 140 141 leal rva(pvh_level3_kernel_pgt)(%ebp), %edi 142 addl %ebx, (PAGE_SIZE - 16)(%edi) 143 addl %ebx, (PAGE_SIZE - 8)(%edi) 144 145 /* pvh_level2_ident_pgt is fine - large pages */ 146 147 /* pvh_level2_kernel_pgt needs adjustment - large pages */ 148 leal rva(pvh_level2_kernel_pgt)(%ebp), %edi 149 movl $PTRS_PER_PMD, %ecx 1502: 151 testl $_PAGE_PRESENT, 0x00(%edi) 152 jz 1f 153 addl %ebx, 0x00(%edi) 1541: 155 addl $8, %edi 156 decl %ecx 157 jnz 2b 158 159.Lpagetable_done: 160 /* Enable pre-constructed page tables. */ 161 leal rva(pvh_init_top_pgt)(%ebp), %eax 162 mov %eax, %cr3 163 mov $(X86_CR0_PG | X86_CR0_PE), %eax 164 mov %eax, %cr0 165 166 /* Jump to 64-bit mode. */ 167 pushl $PVH_CS_SEL 168 leal rva(1f)(%ebp), %eax 169 pushl %eax 170 lretl 171 172 /* 64-bit entry point. */ 173 .code64 1741: 175 UNWIND_HINT_END_OF_STACK 176 177 /* 178 * Set up GSBASE. 179 * Note that on SMP the boot CPU uses the init data section until 180 * the per-CPU areas are set up. 181 */ 182 movl $MSR_GS_BASE,%ecx 183 xorl %eax, %eax 184 xorl %edx, %edx 185 wrmsr 186 187 /* Call xen_prepare_pvh() via the kernel virtual mapping */ 188 leaq xen_prepare_pvh(%rip), %rax 189 subq phys_base(%rip), %rax 190 addq $__START_KERNEL_map, %rax 191 ANNOTATE_RETPOLINE_SAFE 192 call *%rax 193 194 /* startup_64 expects boot_params in %rsi. */ 195 lea pvh_bootparams(%rip), %rsi 196 jmp startup_64 197 198#else /* CONFIG_X86_64 */ 199 200 call mk_early_pgtbl_32 201 202 mov $_pa(initial_page_table), %eax 203 mov %eax, %cr3 204 205 mov %cr0, %eax 206 or $(X86_CR0_PG | X86_CR0_PE), %eax 207 mov %eax, %cr0 208 209 ljmp $PVH_CS_SEL, $1f 2101: 211 call xen_prepare_pvh 212 mov $_pa(pvh_bootparams), %esi 213 214 /* startup_32 doesn't expect paging and PAE to be on. */ 215 ljmp $PVH_CS_SEL, $_pa(2f) 2162: 217 mov %cr0, %eax 218 and $~X86_CR0_PG, %eax 219 mov %eax, %cr0 220 mov %cr4, %eax 221 and $~X86_CR4_PAE, %eax 222 mov %eax, %cr4 223 224 ljmp $PVH_CS_SEL, $_pa(startup_32) 225#endif 226SYM_CODE_END(pvh_start_xen) 227 228 .section ".init.data","aw" 229 .balign 8 230SYM_DATA_START_LOCAL(gdt) 231 .word gdt_end - gdt_start - 1 232 .long gdt_start - gdt 233 .word 0 234SYM_DATA_END(gdt) 235SYM_DATA_START_LOCAL(gdt_start) 236 .quad 0x0000000000000000 /* NULL descriptor */ 237#ifdef CONFIG_X86_64 238 .quad GDT_ENTRY(DESC_CODE64, 0, 0xfffff) /* PVH_CS_SEL */ 239#else 240 .quad GDT_ENTRY(DESC_CODE32, 0, 0xfffff) /* PVH_CS_SEL */ 241#endif 242 .quad GDT_ENTRY(DESC_DATA32, 0, 0xfffff) /* PVH_DS_SEL */ 243SYM_DATA_END_LABEL(gdt_start, SYM_L_LOCAL, gdt_end) 244 245 .balign 16 246SYM_DATA_START_LOCAL(early_stack) 247 .fill BOOT_STACK_SIZE, 1, 0 248SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end) 249 250#ifdef CONFIG_X86_64 251/* 252 * Xen PVH needs a set of identity mapped and kernel high mapping 253 * page tables. pvh_start_xen starts running on the identity mapped 254 * page tables, but xen_prepare_pvh calls into the high mapping. 255 * These page tables need to be relocatable and are only used until 256 * startup_64 transitions to init_top_pgt. 257 */ 258SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt) 259 .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 260 .org pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0 261 .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 262 .org pvh_init_top_pgt + L4_START_KERNEL * 8, 0 263 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 264 .quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 265SYM_DATA_END(pvh_init_top_pgt) 266 267SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt) 268 .quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 269 .fill 511, 8, 0 270SYM_DATA_END(pvh_level3_ident_pgt) 271SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt) 272 /* 273 * Since I easily can, map the first 1G. 274 * Don't set NX because code runs from these pages. 275 * 276 * Note: This sets _PAGE_GLOBAL despite whether 277 * the CPU supports it or it is enabled. But, 278 * the CPU should ignore the bit. 279 */ 280 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) 281SYM_DATA_END(pvh_level2_ident_pgt) 282SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt) 283 .fill L3_START_KERNEL, 8, 0 284 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ 285 .quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 286 .quad 0 /* no fixmap */ 287SYM_DATA_END(pvh_level3_kernel_pgt) 288 289SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt) 290 /* 291 * Kernel high mapping. 292 * 293 * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in 294 * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled, 295 * 512 MiB otherwise. 296 * 297 * (NOTE: after that starts the module area, see MODULES_VADDR.) 298 * 299 * This table is eventually used by the kernel during normal runtime. 300 * Care must be taken to clear out undesired bits later, like _PAGE_RW 301 * or _PAGE_GLOBAL in some cases. 302 */ 303 PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE) 304SYM_DATA_END(pvh_level2_kernel_pgt) 305 306 ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC, 307 .long CONFIG_PHYSICAL_ALIGN; 308 .long LOAD_PHYSICAL_ADDR; 309 .long KERNEL_IMAGE_SIZE - 1) 310#endif 311 312 ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .global xen_elfnote_phys32_entry; 313 xen_elfnote_phys32_entry: _ASM_PTR xen_elfnote_phys32_entry_value - .) 314