1/* SPDX-License-Identifier: GPL-2.0 */ 2 3/* 4 * Copyright C 2016, Oracle and/or its affiliates. All rights reserved. 5 */ 6 7 .code32 8 .text 9#ifdef CONFIG_X86_32 10#define _pa(x) ((x) - __START_KERNEL_map) 11#endif 12#define rva(x) ((x) - pvh_start_xen) 13 14#include <linux/elfnote.h> 15#include <linux/init.h> 16#include <linux/linkage.h> 17#include <asm/desc_defs.h> 18#include <asm/segment.h> 19#include <asm/asm.h> 20#include <asm/boot.h> 21#include <asm/pgtable.h> 22#include <asm/processor-flags.h> 23#include <asm/msr.h> 24#include <asm/nospec-branch.h> 25#include <xen/interface/elfnote.h> 26 27 __INIT 28 29/* 30 * Entry point for PVH guests. 31 * 32 * Xen ABI specifies the following register state when we come here: 33 * 34 * - `ebx`: contains the physical memory address where the loader has placed 35 * the boot start info structure. 36 * - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared. 37 * - `cr4`: all bits are cleared. 38 * - `cs `: must be a 32-bit read/execute code segment with a base of `0` 39 * and a limit of `0xFFFFFFFF`. The selector value is unspecified. 40 * - `ds`, `es`: must be a 32-bit read/write data segment with a base of 41 * `0` and a limit of `0xFFFFFFFF`. The selector values are all 42 * unspecified. 43 * - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit 44 * of '0x67'. 45 * - `eflags`: bit 17 (VM) must be cleared. Bit 9 (IF) must be cleared. 46 * Bit 8 (TF) must be cleared. Other bits are all unspecified. 47 * 48 * All other processor registers and flag bits are unspecified. The OS is in 49 * charge of setting up its own stack, GDT and IDT. 50 */ 51 52#define PVH_GDT_ENTRY_CS 1 53#define PVH_GDT_ENTRY_DS 2 54#define PVH_CS_SEL (PVH_GDT_ENTRY_CS * 8) 55#define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8) 56 57SYM_CODE_START(pvh_start_xen) 58 UNWIND_HINT_END_OF_STACK 59 cld 60 61 /* 62 * See the comment for startup_32 for more details. We need to 63 * execute a call to get the execution address to be position 64 * independent, but we don't have a stack. Save and restore the 65 * magic field of start_info in ebx, and use that as the stack. 66 */ 67 mov (%ebx), %eax 68 leal 4(%ebx), %esp 69 ANNOTATE_INTRA_FUNCTION_CALL 70 call 1f 711: popl %ebp 72 mov %eax, (%ebx) 73 subl $rva(1b), %ebp 74 movl $0, %esp 75 76 leal rva(gdt)(%ebp), %eax 77 addl %eax, 2(%eax) 78 lgdt (%eax) 79 80 mov $PVH_DS_SEL,%eax 81 mov %eax,%ds 82 mov %eax,%es 83 mov %eax,%ss 84 85 /* Stash hvm_start_info. */ 86 leal rva(pvh_start_info)(%ebp), %edi 87 mov %ebx, %esi 88 movl rva(pvh_start_info_sz)(%ebp), %ecx 89 shr $2,%ecx 90 rep movsl 91 92 leal rva(early_stack_end)(%ebp), %esp 93 94 /* Enable PAE mode. */ 95 mov %cr4, %eax 96 orl $X86_CR4_PAE, %eax 97 mov %eax, %cr4 98 99#ifdef CONFIG_X86_64 100 /* Enable Long mode. */ 101 mov $MSR_EFER, %ecx 102 rdmsr 103 btsl $_EFER_LME, %eax 104 wrmsr 105 106 /* 107 * Reuse the non-relocatable symbol emitted for the ELF note to 108 * subtract the build time physical address of pvh_start_xen() from 109 * its actual runtime address, without relying on absolute 32-bit ELF 110 * relocations, as these are not supported by the linker when running 111 * in -pie mode, and should be avoided in .head.text in general. 112 */ 113 mov %ebp, %ebx 114 subl rva(xen_elfnote_phys32_entry)(%ebp), %ebx 115 jz .Lpagetable_done 116 117 /* 118 * Store the resulting load offset in phys_base. __pa() needs 119 * phys_base set to calculate the hypercall page in xen_pvh_init(). 120 */ 121 movl %ebx, rva(phys_base)(%ebp) 122 123 /* Fixup page-tables for relocation. */ 124 leal rva(pvh_init_top_pgt)(%ebp), %edi 125 movl $PTRS_PER_PGD, %ecx 1262: 127 testl $_PAGE_PRESENT, 0x00(%edi) 128 jz 1f 129 addl %ebx, 0x00(%edi) 1301: 131 addl $8, %edi 132 decl %ecx 133 jnz 2b 134 135 /* L3 ident has a single entry. */ 136 leal rva(pvh_level3_ident_pgt)(%ebp), %edi 137 addl %ebx, 0x00(%edi) 138 139 leal rva(pvh_level3_kernel_pgt)(%ebp), %edi 140 addl %ebx, (PAGE_SIZE - 16)(%edi) 141 addl %ebx, (PAGE_SIZE - 8)(%edi) 142 143 /* pvh_level2_ident_pgt is fine - large pages */ 144 145 /* pvh_level2_kernel_pgt needs adjustment - large pages */ 146 leal rva(pvh_level2_kernel_pgt)(%ebp), %edi 147 movl $PTRS_PER_PMD, %ecx 1482: 149 testl $_PAGE_PRESENT, 0x00(%edi) 150 jz 1f 151 addl %ebx, 0x00(%edi) 1521: 153 addl $8, %edi 154 decl %ecx 155 jnz 2b 156 157.Lpagetable_done: 158 /* Enable pre-constructed page tables. */ 159 leal rva(pvh_init_top_pgt)(%ebp), %eax 160 mov %eax, %cr3 161 mov $(X86_CR0_PG | X86_CR0_PE), %eax 162 mov %eax, %cr0 163 164 /* Jump to 64-bit mode. */ 165 pushl $PVH_CS_SEL 166 leal rva(1f)(%ebp), %eax 167 pushl %eax 168 lretl 169 170 /* 64-bit entry point. */ 171 .code64 1721: 173 UNWIND_HINT_END_OF_STACK 174 175 /* 176 * Set up GSBASE. 177 * Note that on SMP the boot CPU uses the init data section until 178 * the per-CPU areas are set up. 179 */ 180 movl $MSR_GS_BASE,%ecx 181 xorl %eax, %eax 182 xorl %edx, %edx 183 wrmsr 184 185 /* Call xen_prepare_pvh() via the kernel virtual mapping */ 186 leaq xen_prepare_pvh(%rip), %rax 187 subq phys_base(%rip), %rax 188 addq $__START_KERNEL_map, %rax 189 ANNOTATE_RETPOLINE_SAFE 190 call *%rax 191 192 /* startup_64 expects boot_params in %rsi. */ 193 lea pvh_bootparams(%rip), %rsi 194 jmp startup_64 195 196#else /* CONFIG_X86_64 */ 197 198 call mk_early_pgtbl_32 199 200 mov $_pa(initial_page_table), %eax 201 mov %eax, %cr3 202 203 mov %cr0, %eax 204 or $(X86_CR0_PG | X86_CR0_PE), %eax 205 mov %eax, %cr0 206 207 ljmp $PVH_CS_SEL, $1f 2081: 209 call xen_prepare_pvh 210 mov $_pa(pvh_bootparams), %esi 211 212 /* startup_32 doesn't expect paging and PAE to be on. */ 213 ljmp $PVH_CS_SEL, $_pa(2f) 2142: 215 mov %cr0, %eax 216 and $~X86_CR0_PG, %eax 217 mov %eax, %cr0 218 mov %cr4, %eax 219 and $~X86_CR4_PAE, %eax 220 mov %eax, %cr4 221 222 ljmp $PVH_CS_SEL, $_pa(startup_32) 223#endif 224SYM_CODE_END(pvh_start_xen) 225 226 .section ".init.data","aw" 227 .balign 8 228SYM_DATA_START_LOCAL(gdt) 229 .word gdt_end - gdt_start - 1 230 .long gdt_start - gdt 231 .word 0 232SYM_DATA_END(gdt) 233SYM_DATA_START_LOCAL(gdt_start) 234 .quad 0x0000000000000000 /* NULL descriptor */ 235#ifdef CONFIG_X86_64 236 .quad GDT_ENTRY(DESC_CODE64, 0, 0xfffff) /* PVH_CS_SEL */ 237#else 238 .quad GDT_ENTRY(DESC_CODE32, 0, 0xfffff) /* PVH_CS_SEL */ 239#endif 240 .quad GDT_ENTRY(DESC_DATA32, 0, 0xfffff) /* PVH_DS_SEL */ 241SYM_DATA_END_LABEL(gdt_start, SYM_L_LOCAL, gdt_end) 242 243 .balign 16 244SYM_DATA_START_LOCAL(early_stack) 245 .fill BOOT_STACK_SIZE, 1, 0 246SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end) 247 248#ifdef CONFIG_X86_64 249/* 250 * Xen PVH needs a set of identity mapped and kernel high mapping 251 * page tables. pvh_start_xen starts running on the identity mapped 252 * page tables, but xen_prepare_pvh calls into the high mapping. 253 * These page tables need to be relocatable and are only used until 254 * startup_64 transitions to init_top_pgt. 255 */ 256SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt) 257 .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 258 .org pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0 259 .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 260 .org pvh_init_top_pgt + L4_START_KERNEL * 8, 0 261 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 262 .quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 263SYM_DATA_END(pvh_init_top_pgt) 264 265SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt) 266 .quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 267 .fill 511, 8, 0 268SYM_DATA_END(pvh_level3_ident_pgt) 269SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt) 270 /* 271 * Since I easily can, map the first 1G. 272 * Don't set NX because code runs from these pages. 273 * 274 * Note: This sets _PAGE_GLOBAL despite whether 275 * the CPU supports it or it is enabled. But, 276 * the CPU should ignore the bit. 277 */ 278 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) 279SYM_DATA_END(pvh_level2_ident_pgt) 280SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt) 281 .fill L3_START_KERNEL, 8, 0 282 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ 283 .quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 284 .quad 0 /* no fixmap */ 285SYM_DATA_END(pvh_level3_kernel_pgt) 286 287SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt) 288 /* 289 * Kernel high mapping. 290 * 291 * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in 292 * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled, 293 * 512 MiB otherwise. 294 * 295 * (NOTE: after that starts the module area, see MODULES_VADDR.) 296 * 297 * This table is eventually used by the kernel during normal runtime. 298 * Care must be taken to clear out undesired bits later, like _PAGE_RW 299 * or _PAGE_GLOBAL in some cases. 300 */ 301 PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE) 302SYM_DATA_END(pvh_level2_kernel_pgt) 303 304 ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC, 305 .long CONFIG_PHYSICAL_ALIGN; 306 .long LOAD_PHYSICAL_ADDR; 307 .long KERNEL_IMAGE_SIZE - 1) 308#endif 309 310 ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .global xen_elfnote_phys32_entry; 311 xen_elfnote_phys32_entry: _ASM_PTR xen_elfnote_phys32_entry_value - .) 312