1/* SPDX-License-Identifier: GPL-2.0 */ 2 3/* 4 * Copyright C 2016, Oracle and/or its affiliates. All rights reserved. 5 */ 6 7 .code32 8 .text 9#define _pa(x) ((x) - __START_KERNEL_map) 10#define rva(x) ((x) - pvh_start_xen) 11 12#include <linux/elfnote.h> 13#include <linux/init.h> 14#include <linux/linkage.h> 15#include <asm/desc_defs.h> 16#include <asm/segment.h> 17#include <asm/asm.h> 18#include <asm/boot.h> 19#include <asm/pgtable.h> 20#include <asm/processor-flags.h> 21#include <asm/msr.h> 22#include <asm/nospec-branch.h> 23#include <xen/interface/elfnote.h> 24 25 __HEAD 26 27/* 28 * Entry point for PVH guests. 29 * 30 * Xen ABI specifies the following register state when we come here: 31 * 32 * - `ebx`: contains the physical memory address where the loader has placed 33 * the boot start info structure. 34 * - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared. 35 * - `cr4`: all bits are cleared. 36 * - `cs `: must be a 32-bit read/execute code segment with a base of `0` 37 * and a limit of `0xFFFFFFFF`. The selector value is unspecified. 38 * - `ds`, `es`: must be a 32-bit read/write data segment with a base of 39 * `0` and a limit of `0xFFFFFFFF`. The selector values are all 40 * unspecified. 41 * - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit 42 * of '0x67'. 43 * - `eflags`: bit 17 (VM) must be cleared. Bit 9 (IF) must be cleared. 44 * Bit 8 (TF) must be cleared. Other bits are all unspecified. 45 * 46 * All other processor registers and flag bits are unspecified. The OS is in 47 * charge of setting up its own stack, GDT and IDT. 48 */ 49 50#define PVH_GDT_ENTRY_CS 1 51#define PVH_GDT_ENTRY_DS 2 52#define PVH_CS_SEL (PVH_GDT_ENTRY_CS * 8) 53#define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8) 54 55SYM_CODE_START_LOCAL(pvh_start_xen) 56 UNWIND_HINT_END_OF_STACK 57 cld 58 59 /* 60 * See the comment for startup_32 for more details. We need to 61 * execute a call to get the execution address to be position 62 * independent, but we don't have a stack. Save and restore the 63 * magic field of start_info in ebx, and use that as the stack. 64 */ 65 mov (%ebx), %eax 66 leal 4(%ebx), %esp 67 ANNOTATE_INTRA_FUNCTION_CALL 68 call 1f 691: popl %ebp 70 mov %eax, (%ebx) 71 subl $rva(1b), %ebp 72 movl $0, %esp 73 74 leal rva(gdt)(%ebp), %eax 75 leal rva(gdt_start)(%ebp), %ecx 76 movl %ecx, 2(%eax) 77 lgdt (%eax) 78 79 mov $PVH_DS_SEL,%eax 80 mov %eax,%ds 81 mov %eax,%es 82 mov %eax,%ss 83 84 /* Stash hvm_start_info. */ 85 leal rva(pvh_start_info)(%ebp), %edi 86 mov %ebx, %esi 87 movl rva(pvh_start_info_sz)(%ebp), %ecx 88 shr $2,%ecx 89 rep 90 movsl 91 92 leal rva(early_stack_end)(%ebp), %esp 93 94 /* Enable PAE mode. */ 95 mov %cr4, %eax 96 orl $X86_CR4_PAE, %eax 97 mov %eax, %cr4 98 99#ifdef CONFIG_X86_64 100 /* Enable Long mode. */ 101 mov $MSR_EFER, %ecx 102 rdmsr 103 btsl $_EFER_LME, %eax 104 wrmsr 105 106 mov %ebp, %ebx 107 subl $_pa(pvh_start_xen), %ebx /* offset */ 108 jz .Lpagetable_done 109 110 /* Fixup page-tables for relocation. */ 111 leal rva(pvh_init_top_pgt)(%ebp), %edi 112 movl $PTRS_PER_PGD, %ecx 1132: 114 testl $_PAGE_PRESENT, 0x00(%edi) 115 jz 1f 116 addl %ebx, 0x00(%edi) 1171: 118 addl $8, %edi 119 decl %ecx 120 jnz 2b 121 122 /* L3 ident has a single entry. */ 123 leal rva(pvh_level3_ident_pgt)(%ebp), %edi 124 addl %ebx, 0x00(%edi) 125 126 leal rva(pvh_level3_kernel_pgt)(%ebp), %edi 127 addl %ebx, (PAGE_SIZE - 16)(%edi) 128 addl %ebx, (PAGE_SIZE - 8)(%edi) 129 130 /* pvh_level2_ident_pgt is fine - large pages */ 131 132 /* pvh_level2_kernel_pgt needs adjustment - large pages */ 133 leal rva(pvh_level2_kernel_pgt)(%ebp), %edi 134 movl $PTRS_PER_PMD, %ecx 1352: 136 testl $_PAGE_PRESENT, 0x00(%edi) 137 jz 1f 138 addl %ebx, 0x00(%edi) 1391: 140 addl $8, %edi 141 decl %ecx 142 jnz 2b 143 144.Lpagetable_done: 145 /* Enable pre-constructed page tables. */ 146 leal rva(pvh_init_top_pgt)(%ebp), %eax 147 mov %eax, %cr3 148 mov $(X86_CR0_PG | X86_CR0_PE), %eax 149 mov %eax, %cr0 150 151 /* Jump to 64-bit mode. */ 152 pushl $PVH_CS_SEL 153 leal rva(1f)(%ebp), %eax 154 pushl %eax 155 lretl 156 157 /* 64-bit entry point. */ 158 .code64 1591: 160 UNWIND_HINT_END_OF_STACK 161 162 /* Set base address in stack canary descriptor. */ 163 mov $MSR_GS_BASE,%ecx 164 leal canary(%rip), %eax 165 xor %edx, %edx 166 wrmsr 167 168 /* 169 * Calculate load offset and store in phys_base. __pa() needs 170 * phys_base set to calculate the hypercall page in xen_pvh_init(). 171 */ 172 movq %rbp, %rbx 173 subq $_pa(pvh_start_xen), %rbx 174 movq %rbx, phys_base(%rip) 175 call xen_prepare_pvh 176 /* 177 * Clear phys_base. __startup_64 will *add* to its value, 178 * so reset to 0. 179 */ 180 xor %rbx, %rbx 181 movq %rbx, phys_base(%rip) 182 183 /* startup_64 expects boot_params in %rsi. */ 184 lea pvh_bootparams(%rip), %rsi 185 jmp startup_64 186 187#else /* CONFIG_X86_64 */ 188 189 call mk_early_pgtbl_32 190 191 mov $_pa(initial_page_table), %eax 192 mov %eax, %cr3 193 194 mov %cr0, %eax 195 or $(X86_CR0_PG | X86_CR0_PE), %eax 196 mov %eax, %cr0 197 198 ljmp $PVH_CS_SEL, $1f 1991: 200 call xen_prepare_pvh 201 mov $_pa(pvh_bootparams), %esi 202 203 /* startup_32 doesn't expect paging and PAE to be on. */ 204 ljmp $PVH_CS_SEL, $_pa(2f) 2052: 206 mov %cr0, %eax 207 and $~X86_CR0_PG, %eax 208 mov %eax, %cr0 209 mov %cr4, %eax 210 and $~X86_CR4_PAE, %eax 211 mov %eax, %cr4 212 213 ljmp $PVH_CS_SEL, $_pa(startup_32) 214#endif 215SYM_CODE_END(pvh_start_xen) 216 217 .section ".init.data","aw" 218 .balign 8 219SYM_DATA_START_LOCAL(gdt) 220 .word gdt_end - gdt_start 221 .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */ 222 .word 0 223SYM_DATA_END(gdt) 224SYM_DATA_START_LOCAL(gdt_start) 225 .quad 0x0000000000000000 /* NULL descriptor */ 226#ifdef CONFIG_X86_64 227 .quad GDT_ENTRY(DESC_CODE64, 0, 0xfffff) /* PVH_CS_SEL */ 228#else 229 .quad GDT_ENTRY(DESC_CODE32, 0, 0xfffff) /* PVH_CS_SEL */ 230#endif 231 .quad GDT_ENTRY(DESC_DATA32, 0, 0xfffff) /* PVH_DS_SEL */ 232SYM_DATA_END_LABEL(gdt_start, SYM_L_LOCAL, gdt_end) 233 234 .balign 16 235SYM_DATA_LOCAL(canary, .fill 48, 1, 0) 236 237SYM_DATA_START_LOCAL(early_stack) 238 .fill BOOT_STACK_SIZE, 1, 0 239SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end) 240 241#ifdef CONFIG_X86_64 242/* 243 * Xen PVH needs a set of identity mapped and kernel high mapping 244 * page tables. pvh_start_xen starts running on the identity mapped 245 * page tables, but xen_prepare_pvh calls into the high mapping. 246 * These page tables need to be relocatable and are only used until 247 * startup_64 transitions to init_top_pgt. 248 */ 249SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt) 250 .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 251 .org pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0 252 .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 253 .org pvh_init_top_pgt + L4_START_KERNEL * 8, 0 254 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 255 .quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 256SYM_DATA_END(pvh_init_top_pgt) 257 258SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt) 259 .quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 260 .fill 511, 8, 0 261SYM_DATA_END(pvh_level3_ident_pgt) 262SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt) 263 /* 264 * Since I easily can, map the first 1G. 265 * Don't set NX because code runs from these pages. 266 * 267 * Note: This sets _PAGE_GLOBAL despite whether 268 * the CPU supports it or it is enabled. But, 269 * the CPU should ignore the bit. 270 */ 271 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) 272SYM_DATA_END(pvh_level2_ident_pgt) 273SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt) 274 .fill L3_START_KERNEL, 8, 0 275 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ 276 .quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 277 .quad 0 /* no fixmap */ 278SYM_DATA_END(pvh_level3_kernel_pgt) 279 280SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt) 281 /* 282 * Kernel high mapping. 283 * 284 * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in 285 * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled, 286 * 512 MiB otherwise. 287 * 288 * (NOTE: after that starts the module area, see MODULES_VADDR.) 289 * 290 * This table is eventually used by the kernel during normal runtime. 291 * Care must be taken to clear out undesired bits later, like _PAGE_RW 292 * or _PAGE_GLOBAL in some cases. 293 */ 294 PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE) 295SYM_DATA_END(pvh_level2_kernel_pgt) 296 297 ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC, 298 .long CONFIG_PHYSICAL_ALIGN; 299 .long LOAD_PHYSICAL_ADDR; 300 .long KERNEL_IMAGE_SIZE - 1) 301#endif 302 303 ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, 304 _ASM_PTR (pvh_start_xen - __START_KERNEL_map)) 305