11a9cdd37SRoger Pau Monné /* 21a9cdd37SRoger Pau Monné * Copyright (c) 2004 Christian Limpach. 31a9cdd37SRoger Pau Monné * Copyright (c) 2004-2006,2008 Kip Macy 41a9cdd37SRoger Pau Monné * Copyright (c) 2013 Roger Pau Monné <roger.pau@citrix.com> 51a9cdd37SRoger Pau Monné * All rights reserved. 61a9cdd37SRoger Pau Monné * 71a9cdd37SRoger Pau Monné * Redistribution and use in source and binary forms, with or without 81a9cdd37SRoger Pau Monné * modification, are permitted provided that the following conditions 91a9cdd37SRoger Pau Monné * are met: 101a9cdd37SRoger Pau Monné * 1. Redistributions of source code must retain the above copyright 111a9cdd37SRoger Pau Monné * notice, this list of conditions and the following disclaimer. 121a9cdd37SRoger Pau Monné * 2. Redistributions in binary form must reproduce the above copyright 131a9cdd37SRoger Pau Monné * notice, this list of conditions and the following disclaimer in the 141a9cdd37SRoger Pau Monné * documentation and/or other materials provided with the distribution. 151a9cdd37SRoger Pau Monné * 161a9cdd37SRoger Pau Monné * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND 171a9cdd37SRoger Pau Monné * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 181a9cdd37SRoger Pau Monné * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 191a9cdd37SRoger Pau Monné * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 201a9cdd37SRoger Pau Monné * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 211a9cdd37SRoger Pau Monné * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 221a9cdd37SRoger Pau Monné * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 231a9cdd37SRoger Pau Monné * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 241a9cdd37SRoger Pau Monné * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 251a9cdd37SRoger Pau Monné * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 261a9cdd37SRoger Pau Monné * SUCH DAMAGE. 271a9cdd37SRoger Pau Monné */ 281a9cdd37SRoger Pau Monné 291a9cdd37SRoger Pau Monné #include <sys/cdefs.h> 301a9cdd37SRoger Pau Monné __FBSDID("$FreeBSD$"); 311a9cdd37SRoger Pau Monné 321a9cdd37SRoger Pau Monné #include <sys/param.h> 331a9cdd37SRoger Pau Monné #include <sys/bus.h> 341a9cdd37SRoger Pau Monné #include <sys/kernel.h> 351a9cdd37SRoger Pau Monné #include <sys/reboot.h> 361a9cdd37SRoger Pau Monné #include <sys/systm.h> 37079f7ef8SRoger Pau Monné #include <sys/malloc.h> 381a9cdd37SRoger Pau Monné #include <sys/lock.h> 391a9cdd37SRoger Pau Monné #include <sys/rwlock.h> 40aa389b4fSRoger Pau Monné #include <sys/boot.h> 4197baeefdSRoger Pau Monné #include <sys/ctype.h> 42079f7ef8SRoger Pau Monné #include <sys/mutex.h> 43079f7ef8SRoger Pau Monné #include <sys/smp.h> 441a9cdd37SRoger Pau Monné 451a9cdd37SRoger Pau Monné #include <vm/vm.h> 461a9cdd37SRoger Pau Monné #include <vm/vm_extern.h> 471a9cdd37SRoger Pau Monné #include <vm/vm_kern.h> 481a9cdd37SRoger Pau Monné #include <vm/vm_page.h> 491a9cdd37SRoger Pau Monné #include <vm/vm_map.h> 501a9cdd37SRoger Pau Monné #include <vm/vm_object.h> 511a9cdd37SRoger Pau Monné #include <vm/vm_pager.h> 521a9cdd37SRoger Pau Monné #include <vm/vm_param.h> 531a9cdd37SRoger Pau Monné 54*fae92773SJohn Baldwin #include <machine/intr_machdep.h> 55*fae92773SJohn Baldwin #include <x86/apicvar.h> 5697baeefdSRoger Pau Monné #include <x86/init.h> 571e69553eSRoger Pau Monné #include <machine/pc/bios.h> 58079f7ef8SRoger Pau Monné #include <machine/smp.h> 5997baeefdSRoger Pau Monné 601a9cdd37SRoger Pau Monné #include <xen/xen-os.h> 611a9cdd37SRoger Pau Monné #include <xen/hypervisor.h> 62b7df74eeSWarner Losh #include <xen/xenstore/xenstorevar.h> 63842471b3SRoger Pau Monné #include <xen/xen_pv.h> 641a9cdd37SRoger Pau Monné 65079f7ef8SRoger Pau Monné #include <xen/interface/vcpu.h> 66079f7ef8SRoger Pau Monné 675f05c794SRoger Pau Monné #include <dev/xen/timer/timer.h> 685f05c794SRoger Pau Monné 691a9cdd37SRoger Pau Monné /* Native initial function */ 701a9cdd37SRoger Pau Monné extern u_int64_t hammer_time(u_int64_t, u_int64_t); 711a9cdd37SRoger Pau Monné /* Xen initial function */ 721a9cdd37SRoger Pau Monné uint64_t hammer_time_xen(start_info_t *, uint64_t); 731a9cdd37SRoger Pau Monné 741e69553eSRoger Pau Monné #define MAX_E820_ENTRIES 128 751e69553eSRoger Pau Monné 7697baeefdSRoger Pau Monné /*--------------------------- Forward Declarations ---------------------------*/ 7797baeefdSRoger Pau Monné static caddr_t xen_pv_parse_preload_data(u_int64_t); 781e69553eSRoger Pau Monné static void xen_pv_parse_memmap(caddr_t, vm_paddr_t *, int *); 7997baeefdSRoger Pau Monné 80079f7ef8SRoger Pau Monné #ifdef SMP 81079f7ef8SRoger Pau Monné static int xen_pv_start_all_aps(void); 82079f7ef8SRoger Pau Monné #endif 83079f7ef8SRoger Pau Monné 84079f7ef8SRoger Pau Monné /*---------------------------- Extern Declarations ---------------------------*/ 85079f7ef8SRoger Pau Monné #ifdef SMP 86079f7ef8SRoger Pau Monné /* Variables used by amd64 mp_machdep to start APs */ 87079f7ef8SRoger Pau Monné extern struct mtx ap_boot_mtx; 88079f7ef8SRoger Pau Monné extern void *bootstacks[]; 89079f7ef8SRoger Pau Monné extern char *doublefault_stack; 90079f7ef8SRoger Pau Monné extern char *nmi_stack; 91079f7ef8SRoger Pau Monné extern void *dpcpu; 92079f7ef8SRoger Pau Monné extern int bootAP; 93079f7ef8SRoger Pau Monné extern char *bootSTK; 94079f7ef8SRoger Pau Monné #endif 95079f7ef8SRoger Pau Monné 9697baeefdSRoger Pau Monné /*-------------------------------- Global Data -------------------------------*/ 9797baeefdSRoger Pau Monné /* Xen init_ops implementation. */ 9897baeefdSRoger Pau Monné struct init_ops xen_init_ops = { 9997baeefdSRoger Pau Monné .parse_preload_data = xen_pv_parse_preload_data, 1005f05c794SRoger Pau Monné .early_clock_source_init = xen_clock_init, 1015f05c794SRoger Pau Monné .early_delay = xen_delay, 1021e69553eSRoger Pau Monné .parse_memmap = xen_pv_parse_memmap, 103079f7ef8SRoger Pau Monné #ifdef SMP 104079f7ef8SRoger Pau Monné .start_all_aps = xen_pv_start_all_aps, 105079f7ef8SRoger Pau Monné #endif 10697baeefdSRoger Pau Monné }; 10797baeefdSRoger Pau Monné 1081e69553eSRoger Pau Monné static struct bios_smap xen_smap[MAX_E820_ENTRIES]; 1091e69553eSRoger Pau Monné 11097baeefdSRoger Pau Monné /*-------------------------------- Xen PV init -------------------------------*/ 1111a9cdd37SRoger Pau Monné /* 1121a9cdd37SRoger Pau Monné * First function called by the Xen PVH boot sequence. 1131a9cdd37SRoger Pau Monné * 1141a9cdd37SRoger Pau Monné * Set some Xen global variables and prepare the environment so it is 1151a9cdd37SRoger Pau Monné * as similar as possible to what native FreeBSD init function expects. 1161a9cdd37SRoger Pau Monné */ 1171a9cdd37SRoger Pau Monné uint64_t 1181a9cdd37SRoger Pau Monné hammer_time_xen(start_info_t *si, uint64_t xenstack) 1191a9cdd37SRoger Pau Monné { 1201a9cdd37SRoger Pau Monné uint64_t physfree; 1211a9cdd37SRoger Pau Monné uint64_t *PT4 = (u_int64_t *)xenstack; 1221a9cdd37SRoger Pau Monné uint64_t *PT3 = (u_int64_t *)(xenstack + PAGE_SIZE); 1231a9cdd37SRoger Pau Monné uint64_t *PT2 = (u_int64_t *)(xenstack + 2 * PAGE_SIZE); 1241a9cdd37SRoger Pau Monné int i; 1251a9cdd37SRoger Pau Monné 1261a9cdd37SRoger Pau Monné xen_domain_type = XEN_PV_DOMAIN; 1271a9cdd37SRoger Pau Monné vm_guest = VM_GUEST_XEN; 1281a9cdd37SRoger Pau Monné 1291a9cdd37SRoger Pau Monné if ((si == NULL) || (xenstack == 0)) { 130c203fa69SRoger Pau Monné xc_printf("ERROR: invalid start_info or xen stack, halting\n"); 1311a9cdd37SRoger Pau Monné HYPERVISOR_shutdown(SHUTDOWN_crash); 1321a9cdd37SRoger Pau Monné } 1331a9cdd37SRoger Pau Monné 134c203fa69SRoger Pau Monné xc_printf("FreeBSD PVH running on %s\n", si->magic); 135c203fa69SRoger Pau Monné 1361a9cdd37SRoger Pau Monné /* We use 3 pages of xen stack for the boot pagetables */ 1371a9cdd37SRoger Pau Monné physfree = xenstack + 3 * PAGE_SIZE - KERNBASE; 1381a9cdd37SRoger Pau Monné 1391a9cdd37SRoger Pau Monné /* Setup Xen global variables */ 1401a9cdd37SRoger Pau Monné HYPERVISOR_start_info = si; 1411a9cdd37SRoger Pau Monné HYPERVISOR_shared_info = 1421a9cdd37SRoger Pau Monné (shared_info_t *)(si->shared_info + KERNBASE); 1431a9cdd37SRoger Pau Monné 1441a9cdd37SRoger Pau Monné /* 1451a9cdd37SRoger Pau Monné * Setup some misc global variables for Xen devices 1461a9cdd37SRoger Pau Monné * 1471a9cdd37SRoger Pau Monné * XXX: Devices that need these specific variables should 1481a9cdd37SRoger Pau Monné * be rewritten to fetch this info by themselves from the 1491a9cdd37SRoger Pau Monné * start_info page. 1501a9cdd37SRoger Pau Monné */ 1511a9cdd37SRoger Pau Monné xen_store = (struct xenstore_domain_interface *) 1521a9cdd37SRoger Pau Monné (ptoa(si->store_mfn) + KERNBASE); 153c203fa69SRoger Pau Monné console_page = (char *)(ptoa(si->console.domU.mfn) + KERNBASE); 1541a9cdd37SRoger Pau Monné 1551a9cdd37SRoger Pau Monné /* 1561a9cdd37SRoger Pau Monné * Use the stack Xen gives us to build the page tables 1571a9cdd37SRoger Pau Monné * as native FreeBSD expects to find them (created 1581a9cdd37SRoger Pau Monné * by the boot trampoline). 1591a9cdd37SRoger Pau Monné */ 1601a9cdd37SRoger Pau Monné for (i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); i++) { 1615f35f84fSRoger Pau Monné /* 1625f35f84fSRoger Pau Monné * Each slot of the level 4 pages points 1635f35f84fSRoger Pau Monné * to the same level 3 page 1645f35f84fSRoger Pau Monné */ 1651a9cdd37SRoger Pau Monné PT4[i] = ((uint64_t)&PT3[0]) - KERNBASE; 1661a9cdd37SRoger Pau Monné PT4[i] |= PG_V | PG_RW | PG_U; 1671a9cdd37SRoger Pau Monné 1685f35f84fSRoger Pau Monné /* 1695f35f84fSRoger Pau Monné * Each slot of the level 3 pages points 1705f35f84fSRoger Pau Monné * to the same level 2 page 1715f35f84fSRoger Pau Monné */ 1721a9cdd37SRoger Pau Monné PT3[i] = ((uint64_t)&PT2[0]) - KERNBASE; 1731a9cdd37SRoger Pau Monné PT3[i] |= PG_V | PG_RW | PG_U; 1741a9cdd37SRoger Pau Monné 1755f35f84fSRoger Pau Monné /* 1765f35f84fSRoger Pau Monné * The level 2 page slots are mapped with 1775f35f84fSRoger Pau Monné * 2MB pages for 1GB. 1785f35f84fSRoger Pau Monné */ 1791a9cdd37SRoger Pau Monné PT2[i] = i * (2 * 1024 * 1024); 1801a9cdd37SRoger Pau Monné PT2[i] |= PG_V | PG_RW | PG_PS | PG_U; 1811a9cdd37SRoger Pau Monné } 1821a9cdd37SRoger Pau Monné load_cr3(((uint64_t)&PT4[0]) - KERNBASE); 1831a9cdd37SRoger Pau Monné 18497baeefdSRoger Pau Monné /* Set the hooks for early functions that diverge from bare metal */ 18597baeefdSRoger Pau Monné init_ops = xen_init_ops; 186842471b3SRoger Pau Monné apic_ops = xen_apic_ops; 18797baeefdSRoger Pau Monné 1881a9cdd37SRoger Pau Monné /* Now we can jump into the native init function */ 1891a9cdd37SRoger Pau Monné return (hammer_time(0, physfree)); 1901a9cdd37SRoger Pau Monné } 19197baeefdSRoger Pau Monné 19297baeefdSRoger Pau Monné /*-------------------------------- PV specific -------------------------------*/ 193079f7ef8SRoger Pau Monné #ifdef SMP 194079f7ef8SRoger Pau Monné static bool 195079f7ef8SRoger Pau Monné start_xen_ap(int cpu) 196079f7ef8SRoger Pau Monné { 197079f7ef8SRoger Pau Monné struct vcpu_guest_context *ctxt; 198079f7ef8SRoger Pau Monné int ms, cpus = mp_naps; 199079f7ef8SRoger Pau Monné const size_t stacksize = KSTACK_PAGES * PAGE_SIZE; 200079f7ef8SRoger Pau Monné 201079f7ef8SRoger Pau Monné /* allocate and set up an idle stack data page */ 202079f7ef8SRoger Pau Monné bootstacks[cpu] = 203079f7ef8SRoger Pau Monné (void *)kmem_malloc(kernel_arena, stacksize, M_WAITOK | M_ZERO); 204079f7ef8SRoger Pau Monné doublefault_stack = 205079f7ef8SRoger Pau Monné (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); 206079f7ef8SRoger Pau Monné nmi_stack = 207079f7ef8SRoger Pau Monné (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); 208079f7ef8SRoger Pau Monné dpcpu = 209079f7ef8SRoger Pau Monné (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); 210079f7ef8SRoger Pau Monné 211079f7ef8SRoger Pau Monné bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8; 212079f7ef8SRoger Pau Monné bootAP = cpu; 213079f7ef8SRoger Pau Monné 214079f7ef8SRoger Pau Monné ctxt = malloc(sizeof(*ctxt), M_TEMP, M_WAITOK | M_ZERO); 215079f7ef8SRoger Pau Monné if (ctxt == NULL) 216079f7ef8SRoger Pau Monné panic("unable to allocate memory"); 217079f7ef8SRoger Pau Monné 218079f7ef8SRoger Pau Monné ctxt->flags = VGCF_IN_KERNEL; 219079f7ef8SRoger Pau Monné ctxt->user_regs.rip = (unsigned long) init_secondary; 220079f7ef8SRoger Pau Monné ctxt->user_regs.rsp = (unsigned long) bootSTK; 221079f7ef8SRoger Pau Monné 222079f7ef8SRoger Pau Monné /* Set the AP to use the same page tables */ 223079f7ef8SRoger Pau Monné ctxt->ctrlreg[3] = KPML4phys; 224079f7ef8SRoger Pau Monné 225079f7ef8SRoger Pau Monné if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) 226079f7ef8SRoger Pau Monné panic("unable to initialize AP#%d", cpu); 227079f7ef8SRoger Pau Monné 228079f7ef8SRoger Pau Monné free(ctxt, M_TEMP); 229079f7ef8SRoger Pau Monné 230079f7ef8SRoger Pau Monné /* Launch the vCPU */ 231079f7ef8SRoger Pau Monné if (HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) 232079f7ef8SRoger Pau Monné panic("unable to start AP#%d", cpu); 233079f7ef8SRoger Pau Monné 234079f7ef8SRoger Pau Monné /* Wait up to 5 seconds for it to start. */ 235079f7ef8SRoger Pau Monné for (ms = 0; ms < 5000; ms++) { 236079f7ef8SRoger Pau Monné if (mp_naps > cpus) 237079f7ef8SRoger Pau Monné return (true); 238079f7ef8SRoger Pau Monné DELAY(1000); 239079f7ef8SRoger Pau Monné } 240079f7ef8SRoger Pau Monné 241079f7ef8SRoger Pau Monné return (false); 242079f7ef8SRoger Pau Monné } 243079f7ef8SRoger Pau Monné 244079f7ef8SRoger Pau Monné static int 245079f7ef8SRoger Pau Monné xen_pv_start_all_aps(void) 246079f7ef8SRoger Pau Monné { 247079f7ef8SRoger Pau Monné int cpu; 248079f7ef8SRoger Pau Monné 249079f7ef8SRoger Pau Monné mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 250079f7ef8SRoger Pau Monné 251079f7ef8SRoger Pau Monné for (cpu = 1; cpu < mp_ncpus; cpu++) { 252079f7ef8SRoger Pau Monné 253079f7ef8SRoger Pau Monné /* attempt to start the Application Processor */ 254079f7ef8SRoger Pau Monné if (!start_xen_ap(cpu)) 255079f7ef8SRoger Pau Monné panic("AP #%d failed to start!", cpu); 256079f7ef8SRoger Pau Monné 257079f7ef8SRoger Pau Monné CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ 258079f7ef8SRoger Pau Monné } 259079f7ef8SRoger Pau Monné 260079f7ef8SRoger Pau Monné return (mp_naps); 261079f7ef8SRoger Pau Monné } 262079f7ef8SRoger Pau Monné #endif /* SMP */ 263079f7ef8SRoger Pau Monné 26497baeefdSRoger Pau Monné /* 26597baeefdSRoger Pau Monné * Functions to convert the "extra" parameters passed by Xen 26697baeefdSRoger Pau Monné * into FreeBSD boot options. 26797baeefdSRoger Pau Monné */ 26897baeefdSRoger Pau Monné static void 26997baeefdSRoger Pau Monné xen_pv_set_env(void) 27097baeefdSRoger Pau Monné { 27197baeefdSRoger Pau Monné char *cmd_line_next, *cmd_line; 27297baeefdSRoger Pau Monné size_t env_size; 27397baeefdSRoger Pau Monné 27497baeefdSRoger Pau Monné cmd_line = HYPERVISOR_start_info->cmd_line; 27597baeefdSRoger Pau Monné env_size = sizeof(HYPERVISOR_start_info->cmd_line); 27697baeefdSRoger Pau Monné 27797baeefdSRoger Pau Monné /* Skip leading spaces */ 27897baeefdSRoger Pau Monné for (; isspace(*cmd_line) && (env_size != 0); cmd_line++) 27997baeefdSRoger Pau Monné env_size--; 28097baeefdSRoger Pau Monné 28197baeefdSRoger Pau Monné /* Replace ',' with '\0' */ 28297baeefdSRoger Pau Monné for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;) 28397baeefdSRoger Pau Monné ; 28497baeefdSRoger Pau Monné 28597baeefdSRoger Pau Monné init_static_kenv(cmd_line, env_size); 28697baeefdSRoger Pau Monné } 28797baeefdSRoger Pau Monné 28897baeefdSRoger Pau Monné static void 28997baeefdSRoger Pau Monné xen_pv_set_boothowto(void) 29097baeefdSRoger Pau Monné { 29197baeefdSRoger Pau Monné int i; 29297baeefdSRoger Pau Monné 29397baeefdSRoger Pau Monné /* get equivalents from the environment */ 29497baeefdSRoger Pau Monné for (i = 0; howto_names[i].ev != NULL; i++) { 29597baeefdSRoger Pau Monné if (getenv(howto_names[i].ev) != NULL) 29697baeefdSRoger Pau Monné boothowto |= howto_names[i].mask; 29797baeefdSRoger Pau Monné } 29897baeefdSRoger Pau Monné } 29997baeefdSRoger Pau Monné 30097baeefdSRoger Pau Monné static caddr_t 30197baeefdSRoger Pau Monné xen_pv_parse_preload_data(u_int64_t modulep) 30297baeefdSRoger Pau Monné { 30397baeefdSRoger Pau Monné /* Parse the extra boot information given by Xen */ 30497baeefdSRoger Pau Monné xen_pv_set_env(); 30597baeefdSRoger Pau Monné xen_pv_set_boothowto(); 30697baeefdSRoger Pau Monné 30797baeefdSRoger Pau Monné return (NULL); 30897baeefdSRoger Pau Monné } 3091e69553eSRoger Pau Monné 3101e69553eSRoger Pau Monné static void 3111e69553eSRoger Pau Monné xen_pv_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) 3121e69553eSRoger Pau Monné { 3131e69553eSRoger Pau Monné struct xen_memory_map memmap; 3141e69553eSRoger Pau Monné u_int32_t size; 3151e69553eSRoger Pau Monné int rc; 3161e69553eSRoger Pau Monné 3171e69553eSRoger Pau Monné /* Fetch the E820 map from Xen */ 3181e69553eSRoger Pau Monné memmap.nr_entries = MAX_E820_ENTRIES; 3191e69553eSRoger Pau Monné set_xen_guest_handle(memmap.buffer, xen_smap); 3201e69553eSRoger Pau Monné rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); 3211e69553eSRoger Pau Monné if (rc) 3221e69553eSRoger Pau Monné panic("unable to fetch Xen E820 memory map"); 3231e69553eSRoger Pau Monné size = memmap.nr_entries * sizeof(xen_smap[0]); 3241e69553eSRoger Pau Monné 3251e69553eSRoger Pau Monné bios_add_smap_entries(xen_smap, size, physmap, physmap_idx); 3261e69553eSRoger Pau Monné } 327