1843e1988Sjohnlev /* 2843e1988Sjohnlev * CDDL HEADER START 3843e1988Sjohnlev * 4843e1988Sjohnlev * The contents of this file are subject to the terms of the 5843e1988Sjohnlev * Common Development and Distribution License (the "License"). 6843e1988Sjohnlev * You may not use this file except in compliance with the License. 7843e1988Sjohnlev * 8843e1988Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9843e1988Sjohnlev * or http://www.opensolaris.org/os/licensing. 10843e1988Sjohnlev * See the License for the specific language governing permissions 11843e1988Sjohnlev * and limitations under the License. 12843e1988Sjohnlev * 13843e1988Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each 14843e1988Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15843e1988Sjohnlev * If applicable, add the following below this CDDL HEADER, with the 16843e1988Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying 17843e1988Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner] 18843e1988Sjohnlev * 19843e1988Sjohnlev * CDDL HEADER END 20843e1988Sjohnlev */ 21843e1988Sjohnlev 22843e1988Sjohnlev /* 23*349b53ddSStuart Maybee * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24843e1988Sjohnlev * Use is subject to license terms. 25843e1988Sjohnlev */ 26843e1988Sjohnlev 27843e1988Sjohnlev /* derived from netbsd's xen_machdep.c 1.1.2.1 */ 28843e1988Sjohnlev 29843e1988Sjohnlev /* 30843e1988Sjohnlev * 31843e1988Sjohnlev * Copyright (c) 2004 Christian Limpach. 32843e1988Sjohnlev * All rights reserved. 33843e1988Sjohnlev * 34843e1988Sjohnlev * Redistribution and use in source and binary forms, with or without 35843e1988Sjohnlev * modification, are permitted provided that the following conditions 36843e1988Sjohnlev * are met: 37843e1988Sjohnlev * 1. Redistributions of source code must retain the above copyright 38843e1988Sjohnlev * notice, this list of conditions and the following disclaimer. 39843e1988Sjohnlev * 2. Redistributions in binary form must reproduce the above copyright 40843e1988Sjohnlev * notice, this list of conditions and the following disclaimer in the 41843e1988Sjohnlev * documentation and/or other materials provided with the distribution. 42843e1988Sjohnlev * 3. This section intentionally left blank. 43843e1988Sjohnlev * 4. The name of the author may not be used to endorse or promote products 44843e1988Sjohnlev * derived from this software without specific prior written permission. 45843e1988Sjohnlev * 46843e1988Sjohnlev * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 47843e1988Sjohnlev * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 48843e1988Sjohnlev * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 49843e1988Sjohnlev * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 50843e1988Sjohnlev * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 51843e1988Sjohnlev * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 52843e1988Sjohnlev * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 53843e1988Sjohnlev * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 54843e1988Sjohnlev * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 55843e1988Sjohnlev * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 56843e1988Sjohnlev */ 57843e1988Sjohnlev /* 58843e1988Sjohnlev * Section 3 of the above license was updated in response to bug 6379571. 59843e1988Sjohnlev */ 60843e1988Sjohnlev 61*349b53ddSStuart Maybee #include <sys/xpv_user.h> 62*349b53ddSStuart Maybee 63*349b53ddSStuart Maybee /* XXX 3.3. TODO remove this include */ 64*349b53ddSStuart Maybee #include <xen/public/arch-x86/xen-mca.h> 65*349b53ddSStuart Maybee 669e839ce9Sgarypen #include <sys/ctype.h> 67843e1988Sjohnlev #include <sys/types.h> 68843e1988Sjohnlev #include <sys/cmn_err.h> 69843e1988Sjohnlev #include <sys/trap.h> 70843e1988Sjohnlev #include <sys/segments.h> 71843e1988Sjohnlev #include <sys/hypervisor.h> 72843e1988Sjohnlev #include <sys/xen_mmu.h> 73843e1988Sjohnlev #include <sys/machsystm.h> 74843e1988Sjohnlev #include <sys/promif.h> 75843e1988Sjohnlev #include <sys/bootconf.h> 76843e1988Sjohnlev #include <sys/bootinfo.h> 77843e1988Sjohnlev #include <sys/cpr.h> 78843e1988Sjohnlev #include <sys/taskq.h> 79843e1988Sjohnlev #include <sys/uadmin.h> 80843e1988Sjohnlev #include <sys/evtchn_impl.h> 81843e1988Sjohnlev #include <sys/archsystm.h> 82843e1988Sjohnlev #include <xen/sys/xenbus_impl.h> 83843e1988Sjohnlev #include <sys/mach_mmu.h> 84843e1988Sjohnlev #include <vm/hat_i86.h> 85843e1988Sjohnlev #include <sys/gnttab.h> 86843e1988Sjohnlev #include <sys/reboot.h> 87843e1988Sjohnlev #include <sys/stack.h> 88843e1988Sjohnlev #include <sys/clock.h> 89843e1988Sjohnlev #include <sys/bitmap.h> 90843e1988Sjohnlev #include <sys/processor.h> 91843e1988Sjohnlev #include <sys/xen_errno.h> 92843e1988Sjohnlev #include <sys/xpv_panic.h> 93843e1988Sjohnlev #include <sys/smp_impldefs.h> 94843e1988Sjohnlev #include <sys/cpu.h> 95843e1988Sjohnlev #include <sys/balloon_impl.h> 96843e1988Sjohnlev #include <sys/ddi.h> 97843e1988Sjohnlev 98843e1988Sjohnlev #ifdef DEBUG 99843e1988Sjohnlev #define SUSPEND_DEBUG if (xen_suspend_debug) xen_printf 100843e1988Sjohnlev #else 101843e1988Sjohnlev #define SUSPEND_DEBUG(...) 102843e1988Sjohnlev #endif 103843e1988Sjohnlev 104843e1988Sjohnlev int cpr_debug; 105843e1988Sjohnlev cpuset_t cpu_suspend_lost_set; 106843e1988Sjohnlev static int xen_suspend_debug; 107843e1988Sjohnlev 108e4b86885SCheng Sean Ye uint_t xen_phys_ncpus; 109e4b86885SCheng Sean Ye xen_mc_logical_cpu_t *xen_phys_cpus; 110e4b86885SCheng Sean Ye int xen_physinfo_debug = 0; 111e4b86885SCheng Sean Ye 1129e839ce9Sgarypen /* 1139e839ce9Sgarypen * Determine helpful version information. 1149e839ce9Sgarypen * 1159e839ce9Sgarypen * (And leave copies in the data segment so we can look at them later 1169e839ce9Sgarypen * with e.g. kmdb.) 1179e839ce9Sgarypen */ 1189e839ce9Sgarypen 1199e839ce9Sgarypen typedef enum xen_version { 1209e839ce9Sgarypen XENVER_BOOT_IDX, 1219e839ce9Sgarypen XENVER_CURRENT_IDX 1229e839ce9Sgarypen } xen_version_t; 1239e839ce9Sgarypen 1249e839ce9Sgarypen struct xenver { 1259e839ce9Sgarypen ulong_t xv_major; 1269e839ce9Sgarypen ulong_t xv_minor; 1279e839ce9Sgarypen ulong_t xv_revision; 1289e839ce9Sgarypen xen_extraversion_t xv_ver; 129ab4a9bebSjohnlev ulong_t xv_is_xvm; 1309e839ce9Sgarypen xen_changeset_info_t xv_chgset; 1319e839ce9Sgarypen xen_compile_info_t xv_build; 1329e839ce9Sgarypen xen_capabilities_info_t xv_caps; 1339e839ce9Sgarypen } xenver[2]; 1349e839ce9Sgarypen 1359e839ce9Sgarypen #define XENVER_BOOT(m) (xenver[XENVER_BOOT_IDX].m) 1369e839ce9Sgarypen #define XENVER_CURRENT(m) (xenver[XENVER_CURRENT_IDX].m) 1379e839ce9Sgarypen 1389e839ce9Sgarypen /* 1399e839ce9Sgarypen * Update the xenver data. We maintain two copies, boot and 1409e839ce9Sgarypen * current. If we are setting the boot, then also set current. 1419e839ce9Sgarypen */ 1429e839ce9Sgarypen static void 1439e839ce9Sgarypen xen_set_version(xen_version_t idx) 1449e839ce9Sgarypen { 1459e839ce9Sgarypen ulong_t ver; 1469e839ce9Sgarypen 1479e839ce9Sgarypen bzero(&xenver[idx], sizeof (xenver[idx])); 1489e839ce9Sgarypen 1499e839ce9Sgarypen ver = HYPERVISOR_xen_version(XENVER_version, 0); 1509e839ce9Sgarypen 1519e839ce9Sgarypen xenver[idx].xv_major = BITX(ver, 31, 16); 1529e839ce9Sgarypen xenver[idx].xv_minor = BITX(ver, 15, 0); 1539e839ce9Sgarypen 1549e839ce9Sgarypen (void) HYPERVISOR_xen_version(XENVER_extraversion, &xenver[idx].xv_ver); 1559e839ce9Sgarypen 1569e839ce9Sgarypen /* 1579e839ce9Sgarypen * The revision is buried in the extraversion information that is 1589e839ce9Sgarypen * maintained by the hypervisor. For our purposes we expect that 1599e839ce9Sgarypen * the revision number is: 1609e839ce9Sgarypen * - the second character in the extraversion information 1619e839ce9Sgarypen * - one character long 1629e839ce9Sgarypen * - numeric digit 1639e839ce9Sgarypen * If it isn't then we can't extract the revision and we leave it 1649e839ce9Sgarypen * set to 0. 1659e839ce9Sgarypen */ 1669e839ce9Sgarypen if (strlen(xenver[idx].xv_ver) > 1 && isdigit(xenver[idx].xv_ver[1])) 1679e839ce9Sgarypen xenver[idx].xv_revision = xenver[idx].xv_ver[1] - '0'; 1689e839ce9Sgarypen else 1699e839ce9Sgarypen cmn_err(CE_WARN, "Cannot extract revision on this hypervisor " 1709e839ce9Sgarypen "version: v%s, unexpected version format", 1719e839ce9Sgarypen xenver[idx].xv_ver); 1729e839ce9Sgarypen 173ab4a9bebSjohnlev xenver[idx].xv_is_xvm = 0; 174ab4a9bebSjohnlev 175ab4a9bebSjohnlev if (strlen(xenver[idx].xv_ver) >= 4 && 176ab4a9bebSjohnlev strncmp(xenver[idx].xv_ver + strlen(xenver[idx].xv_ver) - 4, 177ab4a9bebSjohnlev "-xvm", 4) == 0) 178ab4a9bebSjohnlev xenver[idx].xv_is_xvm = 1; 179ab4a9bebSjohnlev 1809e839ce9Sgarypen (void) HYPERVISOR_xen_version(XENVER_changeset, 1819e839ce9Sgarypen &xenver[idx].xv_chgset); 1829e839ce9Sgarypen 1839e839ce9Sgarypen (void) HYPERVISOR_xen_version(XENVER_compile_info, 1849e839ce9Sgarypen &xenver[idx].xv_build); 1859e839ce9Sgarypen /* 1869e839ce9Sgarypen * Capabilities are a set of space separated ascii strings 1879e839ce9Sgarypen * e.g. 'xen-3.1-x86_32p' or 'hvm-3.2-x86_64' 1889e839ce9Sgarypen */ 1899e839ce9Sgarypen (void) HYPERVISOR_xen_version(XENVER_capabilities, 1909e839ce9Sgarypen &xenver[idx].xv_caps); 1919e839ce9Sgarypen 1929e839ce9Sgarypen cmn_err(CE_CONT, "?v%lu.%lu%s chgset '%s'\n", xenver[idx].xv_major, 1939e839ce9Sgarypen xenver[idx].xv_minor, xenver[idx].xv_ver, xenver[idx].xv_chgset); 1949e839ce9Sgarypen 1959e839ce9Sgarypen if (idx == XENVER_BOOT_IDX) 1969e839ce9Sgarypen bcopy(&xenver[XENVER_BOOT_IDX], &xenver[XENVER_CURRENT_IDX], 1979e839ce9Sgarypen sizeof (xenver[XENVER_BOOT_IDX])); 1989e839ce9Sgarypen } 1999e839ce9Sgarypen 2009e839ce9Sgarypen typedef enum xen_hypervisor_check { 2019e839ce9Sgarypen XEN_RUN_CHECK, 2029e839ce9Sgarypen XEN_SUSPEND_CHECK 2039e839ce9Sgarypen } xen_hypervisor_check_t; 2049e839ce9Sgarypen 2059e839ce9Sgarypen /* 2069e839ce9Sgarypen * To run the hypervisor must be 3.0.4 or better. To suspend/resume 2079e839ce9Sgarypen * we need 3.0.4 or better and if it is 3.0.4. then it must be provided 2089e839ce9Sgarypen * by the Solaris xVM project. 2099e839ce9Sgarypen * Checking can be disabled for testing purposes by setting the 2109e839ce9Sgarypen * xen_suspend_debug variable. 2119e839ce9Sgarypen */ 2129e839ce9Sgarypen static int 2139e839ce9Sgarypen xen_hypervisor_supports_solaris(xen_hypervisor_check_t check) 2149e839ce9Sgarypen { 2159e839ce9Sgarypen if (xen_suspend_debug == 1) 2169e839ce9Sgarypen return (1); 2179e839ce9Sgarypen if (XENVER_CURRENT(xv_major) < 3) 2189e839ce9Sgarypen return (0); 2199e839ce9Sgarypen if (XENVER_CURRENT(xv_major) > 3) 2209e839ce9Sgarypen return (1); 2219e839ce9Sgarypen if (XENVER_CURRENT(xv_minor) > 0) 2229e839ce9Sgarypen return (1); 2239e839ce9Sgarypen if (XENVER_CURRENT(xv_revision) < 4) 2249e839ce9Sgarypen return (0); 225ab4a9bebSjohnlev if (check == XEN_SUSPEND_CHECK && XENVER_CURRENT(xv_revision) == 4 && 226ab4a9bebSjohnlev !XENVER_CURRENT(xv_is_xvm)) 2279e839ce9Sgarypen return (0); 228ab4a9bebSjohnlev 2299e839ce9Sgarypen return (1); 2309e839ce9Sgarypen } 2319e839ce9Sgarypen 232ab4a9bebSjohnlev /* 233ab4a9bebSjohnlev * If the hypervisor is -xvm, or 3.1.2 or higher, we don't need the 234ab4a9bebSjohnlev * workaround. 235ab4a9bebSjohnlev */ 236ab4a9bebSjohnlev static void 237ab4a9bebSjohnlev xen_pte_workaround(void) 238ab4a9bebSjohnlev { 239ab4a9bebSjohnlev #if defined(__amd64) 240ab4a9bebSjohnlev extern int pt_kern; 241ab4a9bebSjohnlev 242ab4a9bebSjohnlev if (XENVER_CURRENT(xv_major) != 3) 243ab4a9bebSjohnlev return; 244ab4a9bebSjohnlev if (XENVER_CURRENT(xv_minor) > 1) 245ab4a9bebSjohnlev return; 246ab4a9bebSjohnlev if (XENVER_CURRENT(xv_minor) == 1 && 247ab4a9bebSjohnlev XENVER_CURRENT(xv_revision) > 1) 248ab4a9bebSjohnlev return; 249ab4a9bebSjohnlev if (XENVER_CURRENT(xv_is_xvm)) 250ab4a9bebSjohnlev return; 251ab4a9bebSjohnlev 252ab4a9bebSjohnlev pt_kern = PT_USER; 253ab4a9bebSjohnlev #endif 254ab4a9bebSjohnlev } 255ab4a9bebSjohnlev 256843e1988Sjohnlev void 257843e1988Sjohnlev xen_set_callback(void (*func)(void), uint_t type, uint_t flags) 258843e1988Sjohnlev { 259843e1988Sjohnlev struct callback_register cb; 260843e1988Sjohnlev 261843e1988Sjohnlev bzero(&cb, sizeof (cb)); 262843e1988Sjohnlev #if defined(__amd64) 263843e1988Sjohnlev cb.address = (ulong_t)func; 264843e1988Sjohnlev #elif defined(__i386) 265843e1988Sjohnlev cb.address.cs = KCS_SEL; 266843e1988Sjohnlev cb.address.eip = (ulong_t)func; 267843e1988Sjohnlev #endif 268843e1988Sjohnlev cb.type = type; 269843e1988Sjohnlev cb.flags = flags; 270843e1988Sjohnlev 271843e1988Sjohnlev /* 272843e1988Sjohnlev * XXPV always ignore return value for NMI 273843e1988Sjohnlev */ 274843e1988Sjohnlev if (HYPERVISOR_callback_op(CALLBACKOP_register, &cb) != 0 && 275843e1988Sjohnlev type != CALLBACKTYPE_nmi) 276843e1988Sjohnlev panic("HYPERVISOR_callback_op failed"); 277843e1988Sjohnlev } 278843e1988Sjohnlev 279843e1988Sjohnlev void 280843e1988Sjohnlev xen_init_callbacks(void) 281843e1988Sjohnlev { 282843e1988Sjohnlev /* 283843e1988Sjohnlev * register event (interrupt) handler. 284843e1988Sjohnlev */ 285843e1988Sjohnlev xen_set_callback(xen_callback, CALLBACKTYPE_event, 0); 286843e1988Sjohnlev 287843e1988Sjohnlev /* 288843e1988Sjohnlev * failsafe handler. 289843e1988Sjohnlev */ 290843e1988Sjohnlev xen_set_callback(xen_failsafe_callback, CALLBACKTYPE_failsafe, 291843e1988Sjohnlev CALLBACKF_mask_events); 292843e1988Sjohnlev 293843e1988Sjohnlev /* 294843e1988Sjohnlev * NMI handler. 295843e1988Sjohnlev */ 296843e1988Sjohnlev xen_set_callback(nmiint, CALLBACKTYPE_nmi, 0); 297843e1988Sjohnlev 298843e1988Sjohnlev /* 299843e1988Sjohnlev * system call handler 300843e1988Sjohnlev * XXPV move to init_cpu_syscall? 301843e1988Sjohnlev */ 302843e1988Sjohnlev #if defined(__amd64) 303843e1988Sjohnlev xen_set_callback(sys_syscall, CALLBACKTYPE_syscall, 304843e1988Sjohnlev CALLBACKF_mask_events); 305843e1988Sjohnlev #endif /* __amd64 */ 306843e1988Sjohnlev } 307843e1988Sjohnlev 308843e1988Sjohnlev 309843e1988Sjohnlev /* 310843e1988Sjohnlev * cmn_err() followed by a 1/4 second delay; this gives the 311843e1988Sjohnlev * logging service a chance to flush messages and helps avoid 312843e1988Sjohnlev * intermixing output from prom_printf(). 313843e1988Sjohnlev * XXPV: doesn't exactly help us on UP though. 314843e1988Sjohnlev */ 315843e1988Sjohnlev /*PRINTFLIKE2*/ 316843e1988Sjohnlev void 317843e1988Sjohnlev cpr_err(int ce, const char *fmt, ...) 318843e1988Sjohnlev { 319843e1988Sjohnlev va_list adx; 320843e1988Sjohnlev 321843e1988Sjohnlev va_start(adx, fmt); 322843e1988Sjohnlev vcmn_err(ce, fmt, adx); 323843e1988Sjohnlev va_end(adx); 324843e1988Sjohnlev drv_usecwait(MICROSEC >> 2); 325843e1988Sjohnlev } 326843e1988Sjohnlev 327843e1988Sjohnlev void 328843e1988Sjohnlev xen_suspend_devices(void) 329843e1988Sjohnlev { 330843e1988Sjohnlev int rc; 331843e1988Sjohnlev 332843e1988Sjohnlev SUSPEND_DEBUG("xen_suspend_devices\n"); 333843e1988Sjohnlev 334843e1988Sjohnlev if ((rc = cpr_suspend_devices(ddi_root_node())) != 0) 335843e1988Sjohnlev panic("failed to suspend devices: %d", rc); 336843e1988Sjohnlev } 337843e1988Sjohnlev 338843e1988Sjohnlev void 339843e1988Sjohnlev xen_resume_devices(void) 340843e1988Sjohnlev { 341843e1988Sjohnlev int rc; 342843e1988Sjohnlev 343843e1988Sjohnlev SUSPEND_DEBUG("xen_resume_devices\n"); 344843e1988Sjohnlev 345843e1988Sjohnlev if ((rc = cpr_resume_devices(ddi_root_node(), 0)) != 0) 346843e1988Sjohnlev panic("failed to resume devices: %d", rc); 347843e1988Sjohnlev } 348843e1988Sjohnlev 349843e1988Sjohnlev /* 350843e1988Sjohnlev * The list of mfn pages is out of date. Recompute it. 351843e1988Sjohnlev */ 352843e1988Sjohnlev static void 353843e1988Sjohnlev rebuild_mfn_list(void) 354843e1988Sjohnlev { 355843e1988Sjohnlev int i = 0; 356843e1988Sjohnlev size_t sz; 357843e1988Sjohnlev size_t off; 358843e1988Sjohnlev pfn_t pfn; 359843e1988Sjohnlev 360843e1988Sjohnlev SUSPEND_DEBUG("rebuild_mfn_list\n"); 361843e1988Sjohnlev 362843e1988Sjohnlev sz = ((mfn_count * sizeof (mfn_t)) + MMU_PAGEOFFSET) & MMU_PAGEMASK; 363843e1988Sjohnlev 364843e1988Sjohnlev for (off = 0; off < sz; off += MMU_PAGESIZE) { 365843e1988Sjohnlev size_t j = mmu_btop(off); 366843e1988Sjohnlev if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) { 367843e1988Sjohnlev pfn = hat_getpfnum(kas.a_hat, 368843e1988Sjohnlev (caddr_t)&mfn_list_pages[j]); 369843e1988Sjohnlev mfn_list_pages_page[i++] = pfn_to_mfn(pfn); 370843e1988Sjohnlev } 371843e1988Sjohnlev 372843e1988Sjohnlev pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list + off); 373843e1988Sjohnlev mfn_list_pages[j] = pfn_to_mfn(pfn); 374843e1988Sjohnlev } 375843e1988Sjohnlev 376843e1988Sjohnlev pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list_pages_page); 377843e1988Sjohnlev HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list 378843e1988Sjohnlev = pfn_to_mfn(pfn); 379843e1988Sjohnlev } 380843e1988Sjohnlev 381843e1988Sjohnlev static void 382843e1988Sjohnlev suspend_cpus(void) 383843e1988Sjohnlev { 384843e1988Sjohnlev int i; 385843e1988Sjohnlev 386843e1988Sjohnlev SUSPEND_DEBUG("suspend_cpus\n"); 387843e1988Sjohnlev 3881d03c31eSjohnlev mp_enter_barrier(); 389843e1988Sjohnlev 390843e1988Sjohnlev for (i = 1; i < ncpus; i++) { 391843e1988Sjohnlev if (!CPU_IN_SET(cpu_suspend_lost_set, i)) { 392843e1988Sjohnlev SUSPEND_DEBUG("xen_vcpu_down %d\n", i); 393843e1988Sjohnlev (void) xen_vcpu_down(i); 394843e1988Sjohnlev } 395843e1988Sjohnlev 396843e1988Sjohnlev mach_cpucontext_reset(cpu[i]); 397843e1988Sjohnlev } 398843e1988Sjohnlev } 399843e1988Sjohnlev 400843e1988Sjohnlev static void 401843e1988Sjohnlev resume_cpus(void) 402843e1988Sjohnlev { 403843e1988Sjohnlev int i; 404843e1988Sjohnlev 405843e1988Sjohnlev for (i = 1; i < ncpus; i++) { 406843e1988Sjohnlev if (cpu[i] == NULL) 407843e1988Sjohnlev continue; 408843e1988Sjohnlev 409843e1988Sjohnlev if (!CPU_IN_SET(cpu_suspend_lost_set, i)) { 410843e1988Sjohnlev SUSPEND_DEBUG("xen_vcpu_up %d\n", i); 411843e1988Sjohnlev mach_cpucontext_restore(cpu[i]); 412843e1988Sjohnlev (void) xen_vcpu_up(i); 413843e1988Sjohnlev } 414843e1988Sjohnlev } 415843e1988Sjohnlev 4161d03c31eSjohnlev mp_leave_barrier(); 417843e1988Sjohnlev } 418843e1988Sjohnlev 419843e1988Sjohnlev /* 420843e1988Sjohnlev * Top level routine to direct suspend/resume of a domain. 421843e1988Sjohnlev */ 422843e1988Sjohnlev void 423843e1988Sjohnlev xen_suspend_domain(void) 424843e1988Sjohnlev { 425843e1988Sjohnlev extern void rtcsync(void); 426843e1988Sjohnlev extern hrtime_t hres_last_tick; 427843e1988Sjohnlev mfn_t start_info_mfn; 428843e1988Sjohnlev ulong_t flags; 429843e1988Sjohnlev pfn_t pfn; 430843e1988Sjohnlev int i; 431843e1988Sjohnlev 432843e1988Sjohnlev /* 4339e839ce9Sgarypen * Check that we are happy to suspend on this hypervisor. 4349e839ce9Sgarypen */ 4359e839ce9Sgarypen if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) { 4369e839ce9Sgarypen cpr_err(CE_WARN, "Cannot suspend on this hypervisor " 4379e839ce9Sgarypen "version: v%lu.%lu%s, need at least version v3.0.4 or " 4389e839ce9Sgarypen "-xvm based hypervisor", XENVER_CURRENT(xv_major), 4399e839ce9Sgarypen XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver)); 4409e839ce9Sgarypen return; 4419e839ce9Sgarypen } 4429e839ce9Sgarypen 4439e839ce9Sgarypen /* 444843e1988Sjohnlev * XXPV - Are we definitely OK to suspend by the time we've connected 445843e1988Sjohnlev * the handler? 446843e1988Sjohnlev */ 447843e1988Sjohnlev 448843e1988Sjohnlev cpr_err(CE_NOTE, "Domain suspending for save/migrate"); 449843e1988Sjohnlev 450843e1988Sjohnlev SUSPEND_DEBUG("xen_suspend_domain\n"); 451843e1988Sjohnlev 452843e1988Sjohnlev /* 453843e1988Sjohnlev * suspend interrupts and devices 454843e1988Sjohnlev * XXPV - we use suspend/resume for both save/restore domains (like sun 455843e1988Sjohnlev * cpr) and for migration. Would be nice to know the difference if 456843e1988Sjohnlev * possible. For save/restore where down time may be a long time, we 457843e1988Sjohnlev * may want to do more of the things that cpr does. (i.e. notify user 458843e1988Sjohnlev * processes, shrink memory footprint for faster restore, etc.) 459843e1988Sjohnlev */ 460843e1988Sjohnlev xen_suspend_devices(); 461843e1988Sjohnlev SUSPEND_DEBUG("xenbus_suspend\n"); 462843e1988Sjohnlev xenbus_suspend(); 463843e1988Sjohnlev 464843e1988Sjohnlev pfn = hat_getpfnum(kas.a_hat, (caddr_t)xen_info); 465843e1988Sjohnlev start_info_mfn = pfn_to_mfn(pfn); 466843e1988Sjohnlev 467843e1988Sjohnlev /* 468843e1988Sjohnlev * XXPV: cpu hotplug can hold this under a xenbus watch. Are we safe 469843e1988Sjohnlev * wrt xenbus being suspended here? 470843e1988Sjohnlev */ 471843e1988Sjohnlev mutex_enter(&cpu_lock); 472843e1988Sjohnlev 473843e1988Sjohnlev /* 474843e1988Sjohnlev * Suspend must be done on vcpu 0, as no context for other CPUs is 475843e1988Sjohnlev * saved. 476843e1988Sjohnlev * 477843e1988Sjohnlev * XXPV - add to taskq API ? 478843e1988Sjohnlev */ 479843e1988Sjohnlev thread_affinity_set(curthread, 0); 480843e1988Sjohnlev kpreempt_disable(); 481843e1988Sjohnlev 482843e1988Sjohnlev SUSPEND_DEBUG("xen_start_migrate\n"); 483843e1988Sjohnlev xen_start_migrate(); 484843e1988Sjohnlev if (ncpus > 1) 485843e1988Sjohnlev suspend_cpus(); 486843e1988Sjohnlev 487843e1988Sjohnlev /* 488843e1988Sjohnlev * We can grab the ec_lock as it's a spinlock with a high SPL. Hence 489843e1988Sjohnlev * any holder would have dropped it to get through suspend_cpus(). 490843e1988Sjohnlev */ 491843e1988Sjohnlev mutex_enter(&ec_lock); 492843e1988Sjohnlev 493843e1988Sjohnlev /* 494843e1988Sjohnlev * From here on in, we can't take locks. 495843e1988Sjohnlev */ 496843e1988Sjohnlev SUSPEND_DEBUG("ec_suspend\n"); 497843e1988Sjohnlev ec_suspend(); 498843e1988Sjohnlev SUSPEND_DEBUG("gnttab_suspend\n"); 499843e1988Sjohnlev gnttab_suspend(); 500843e1988Sjohnlev 501843e1988Sjohnlev flags = intr_clear(); 502843e1988Sjohnlev 503843e1988Sjohnlev xpv_time_suspend(); 504843e1988Sjohnlev 505843e1988Sjohnlev /* 506843e1988Sjohnlev * Currently, the hypervisor incorrectly fails to bring back 507843e1988Sjohnlev * powered-down VCPUs. Thus we need to record any powered-down VCPUs 508843e1988Sjohnlev * to prevent any attempts to operate on them. But we have to do this 509843e1988Sjohnlev * *after* the very first time we do ec_suspend(). 510843e1988Sjohnlev */ 511843e1988Sjohnlev for (i = 1; i < ncpus; i++) { 512843e1988Sjohnlev if (cpu[i] == NULL) 513843e1988Sjohnlev continue; 514843e1988Sjohnlev 515843e1988Sjohnlev if (cpu_get_state(cpu[i]) == P_POWEROFF) 516843e1988Sjohnlev CPUSET_ATOMIC_ADD(cpu_suspend_lost_set, i); 517843e1988Sjohnlev } 518843e1988Sjohnlev 519843e1988Sjohnlev /* 520843e1988Sjohnlev * The dom0 save/migrate code doesn't automatically translate 521843e1988Sjohnlev * these into PFNs, but expects them to be, so we do it here. 522843e1988Sjohnlev * We don't use mfn_to_pfn() because so many OS services have 523843e1988Sjohnlev * been disabled at this point. 524843e1988Sjohnlev */ 525843e1988Sjohnlev xen_info->store_mfn = mfn_to_pfn_mapping[xen_info->store_mfn]; 526843e1988Sjohnlev xen_info->console.domU.mfn = 527843e1988Sjohnlev mfn_to_pfn_mapping[xen_info->console.domU.mfn]; 528843e1988Sjohnlev 529843e1988Sjohnlev if (CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0) { 530843e1988Sjohnlev prom_printf("xen_suspend_domain(): " 531843e1988Sjohnlev "CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask not set\n"); 532843e1988Sjohnlev (void) HYPERVISOR_shutdown(SHUTDOWN_crash); 533843e1988Sjohnlev } 534843e1988Sjohnlev 535843e1988Sjohnlev if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info, 536843e1988Sjohnlev 0, UVMF_INVLPG)) { 537843e1988Sjohnlev prom_printf("xen_suspend_domain(): " 538843e1988Sjohnlev "HYPERVISOR_update_va_mapping() failed\n"); 539843e1988Sjohnlev (void) HYPERVISOR_shutdown(SHUTDOWN_crash); 540843e1988Sjohnlev } 541843e1988Sjohnlev 542843e1988Sjohnlev SUSPEND_DEBUG("HYPERVISOR_suspend\n"); 543843e1988Sjohnlev 544843e1988Sjohnlev /* 545843e1988Sjohnlev * At this point we suspend and sometime later resume. 546843e1988Sjohnlev */ 547843e1988Sjohnlev if (HYPERVISOR_suspend(start_info_mfn)) { 548843e1988Sjohnlev prom_printf("xen_suspend_domain(): " 549843e1988Sjohnlev "HYPERVISOR_suspend() failed\n"); 550843e1988Sjohnlev (void) HYPERVISOR_shutdown(SHUTDOWN_crash); 551843e1988Sjohnlev } 552843e1988Sjohnlev 553843e1988Sjohnlev /* 554843e1988Sjohnlev * Point HYPERVISOR_shared_info to its new value. 555843e1988Sjohnlev */ 556843e1988Sjohnlev if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info, 557843e1988Sjohnlev xen_info->shared_info | PT_NOCONSIST | PT_VALID | PT_WRITABLE, 558843e1988Sjohnlev UVMF_INVLPG)) 559843e1988Sjohnlev (void) HYPERVISOR_shutdown(SHUTDOWN_crash); 560843e1988Sjohnlev 561843e1988Sjohnlev if (xen_info->nr_pages != mfn_count) { 562843e1988Sjohnlev prom_printf("xen_suspend_domain(): number of pages" 563843e1988Sjohnlev " changed, was 0x%lx, now 0x%lx\n", mfn_count, 564843e1988Sjohnlev xen_info->nr_pages); 565843e1988Sjohnlev (void) HYPERVISOR_shutdown(SHUTDOWN_crash); 566843e1988Sjohnlev } 567843e1988Sjohnlev 568843e1988Sjohnlev xpv_time_resume(); 569843e1988Sjohnlev 570843e1988Sjohnlev cached_max_mfn = 0; 571843e1988Sjohnlev 572843e1988Sjohnlev SUSPEND_DEBUG("gnttab_resume\n"); 573843e1988Sjohnlev gnttab_resume(); 574843e1988Sjohnlev 575843e1988Sjohnlev /* XXPV: add a note that this must be lockless. */ 576843e1988Sjohnlev SUSPEND_DEBUG("ec_resume\n"); 577843e1988Sjohnlev ec_resume(); 578843e1988Sjohnlev 579843e1988Sjohnlev intr_restore(flags); 580843e1988Sjohnlev 581843e1988Sjohnlev if (ncpus > 1) 582843e1988Sjohnlev resume_cpus(); 583843e1988Sjohnlev 584843e1988Sjohnlev mutex_exit(&ec_lock); 585843e1988Sjohnlev xen_end_migrate(); 586843e1988Sjohnlev mutex_exit(&cpu_lock); 587843e1988Sjohnlev 588843e1988Sjohnlev /* 589843e1988Sjohnlev * Now we can take locks again. 590843e1988Sjohnlev */ 591843e1988Sjohnlev 592843e1988Sjohnlev /* 593843e1988Sjohnlev * Force the tick value used for tv_nsec in hres_tick() to be up to 594843e1988Sjohnlev * date. rtcsync() will reset the hrestime value appropriately. 595843e1988Sjohnlev */ 596843e1988Sjohnlev hres_last_tick = xpv_gethrtime(); 597843e1988Sjohnlev 598843e1988Sjohnlev /* 599843e1988Sjohnlev * XXPV: we need to have resumed the CPUs since this takes locks, but 600843e1988Sjohnlev * can remote CPUs see bad state? Presumably yes. Should probably nest 601843e1988Sjohnlev * taking of todlock inside of cpu_lock, or vice versa, then provide an 602843e1988Sjohnlev * unlocked version. Probably need to call clkinitf to reset cpu freq 603843e1988Sjohnlev * and re-calibrate if we migrated to a different speed cpu. Also need 604843e1988Sjohnlev * to make a (re)init_cpu_info call to update processor info structs 605843e1988Sjohnlev * and device tree info. That remains to be written at the moment. 606843e1988Sjohnlev */ 607843e1988Sjohnlev rtcsync(); 608843e1988Sjohnlev 609843e1988Sjohnlev rebuild_mfn_list(); 610843e1988Sjohnlev 611843e1988Sjohnlev SUSPEND_DEBUG("xenbus_resume\n"); 612843e1988Sjohnlev xenbus_resume(); 613843e1988Sjohnlev SUSPEND_DEBUG("xenbus_resume_devices\n"); 614843e1988Sjohnlev xen_resume_devices(); 615843e1988Sjohnlev 616843e1988Sjohnlev thread_affinity_clear(curthread); 617843e1988Sjohnlev kpreempt_enable(); 618843e1988Sjohnlev 619843e1988Sjohnlev SUSPEND_DEBUG("finished xen_suspend_domain\n"); 6209e839ce9Sgarypen 6219e839ce9Sgarypen /* 6229e839ce9Sgarypen * We have restarted our suspended domain, update the hypervisor 6239e839ce9Sgarypen * details. NB: This must be done at the end of this function, 6249e839ce9Sgarypen * since we need the domain to be completely resumed before 6259e839ce9Sgarypen * these functions will work correctly. 6269e839ce9Sgarypen */ 6279e839ce9Sgarypen xen_set_version(XENVER_CURRENT_IDX); 6289e839ce9Sgarypen 6299e839ce9Sgarypen /* 6309e839ce9Sgarypen * We can check and report a warning, but we don't stop the 6319e839ce9Sgarypen * process. 6329e839ce9Sgarypen */ 6339e839ce9Sgarypen if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) 6349e839ce9Sgarypen cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s " 6359e839ce9Sgarypen "but need at least version v3.0.4", 6369e839ce9Sgarypen XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor), 6379e839ce9Sgarypen XENVER_CURRENT(xv_ver)); 6389e839ce9Sgarypen 639843e1988Sjohnlev cmn_err(CE_NOTE, "domain restore/migrate completed"); 640843e1988Sjohnlev } 641843e1988Sjohnlev 642843e1988Sjohnlev /*ARGSUSED*/ 643843e1988Sjohnlev int 644843e1988Sjohnlev xen_debug_handler(void *arg) 645843e1988Sjohnlev { 646843e1988Sjohnlev debug_enter("External debug event received"); 647843e1988Sjohnlev 648843e1988Sjohnlev /* 649843e1988Sjohnlev * If we've not got KMDB loaded, output some stuff difficult to capture 650843e1988Sjohnlev * from a domain core. 651843e1988Sjohnlev */ 652843e1988Sjohnlev if (!(boothowto & RB_DEBUG)) { 653843e1988Sjohnlev shared_info_t *si = HYPERVISOR_shared_info; 654843e1988Sjohnlev int i; 655843e1988Sjohnlev 656843e1988Sjohnlev prom_printf("evtchn_pending [ "); 657843e1988Sjohnlev for (i = 0; i < 8; i++) 658843e1988Sjohnlev prom_printf("%lx ", si->evtchn_pending[i]); 659843e1988Sjohnlev prom_printf("]\nevtchn_mask [ "); 660843e1988Sjohnlev for (i = 0; i < 8; i++) 661843e1988Sjohnlev prom_printf("%lx ", si->evtchn_mask[i]); 662843e1988Sjohnlev prom_printf("]\n"); 663843e1988Sjohnlev 664843e1988Sjohnlev for (i = 0; i < ncpus; i++) { 665843e1988Sjohnlev vcpu_info_t *vcpu = &si->vcpu_info[i]; 666843e1988Sjohnlev if (cpu[i] == NULL) 667843e1988Sjohnlev continue; 668843e1988Sjohnlev prom_printf("CPU%d pending %d mask %d sel %lx\n", 669843e1988Sjohnlev i, vcpu->evtchn_upcall_pending, 670843e1988Sjohnlev vcpu->evtchn_upcall_mask, 671843e1988Sjohnlev vcpu->evtchn_pending_sel); 672843e1988Sjohnlev } 673843e1988Sjohnlev } 674843e1988Sjohnlev 675843e1988Sjohnlev return (0); 676843e1988Sjohnlev } 677843e1988Sjohnlev 678843e1988Sjohnlev /*ARGSUSED*/ 679843e1988Sjohnlev static void 680843e1988Sjohnlev xen_sysrq_handler(struct xenbus_watch *watch, const char **vec, 681843e1988Sjohnlev unsigned int len) 682843e1988Sjohnlev { 683843e1988Sjohnlev xenbus_transaction_t xbt; 684843e1988Sjohnlev char key = '\0'; 685843e1988Sjohnlev int ret; 686843e1988Sjohnlev 687843e1988Sjohnlev retry: 688843e1988Sjohnlev if (xenbus_transaction_start(&xbt)) { 689843e1988Sjohnlev cmn_err(CE_WARN, "failed to start sysrq transaction"); 690843e1988Sjohnlev return; 691843e1988Sjohnlev } 692843e1988Sjohnlev 693843e1988Sjohnlev if ((ret = xenbus_scanf(xbt, "control", "sysrq", "%c", &key)) != 0) { 694843e1988Sjohnlev /* 695843e1988Sjohnlev * ENOENT happens in response to our own xenbus_rm. 696843e1988Sjohnlev * XXPV - this happens spuriously on boot? 697843e1988Sjohnlev */ 698843e1988Sjohnlev if (ret != ENOENT) 699843e1988Sjohnlev cmn_err(CE_WARN, "failed to read sysrq: %d", ret); 700843e1988Sjohnlev goto out; 701843e1988Sjohnlev } 702843e1988Sjohnlev 703843e1988Sjohnlev if ((ret = xenbus_rm(xbt, "control", "sysrq")) != 0) { 704843e1988Sjohnlev cmn_err(CE_WARN, "failed to reset sysrq: %d", ret); 705843e1988Sjohnlev goto out; 706843e1988Sjohnlev } 707843e1988Sjohnlev 708843e1988Sjohnlev if (xenbus_transaction_end(xbt, 0) == EAGAIN) 709843e1988Sjohnlev goto retry; 710843e1988Sjohnlev 711843e1988Sjohnlev /* 712843e1988Sjohnlev * Somewhat arbitrary - on Linux this means 'reboot'. We could just 713843e1988Sjohnlev * accept any key, but this might increase the risk of sending a 714843e1988Sjohnlev * harmless sysrq to the wrong domain... 715843e1988Sjohnlev */ 716843e1988Sjohnlev if (key == 'b') 717843e1988Sjohnlev (void) xen_debug_handler(NULL); 718843e1988Sjohnlev else 719843e1988Sjohnlev cmn_err(CE_WARN, "Ignored sysrq %c", key); 720843e1988Sjohnlev return; 721843e1988Sjohnlev 722843e1988Sjohnlev out: 723843e1988Sjohnlev (void) xenbus_transaction_end(xbt, 1); 724843e1988Sjohnlev } 725843e1988Sjohnlev 726843e1988Sjohnlev taskq_t *xen_shutdown_tq; 727843e1988Sjohnlev 728843e1988Sjohnlev #define SHUTDOWN_INVALID -1 729843e1988Sjohnlev #define SHUTDOWN_POWEROFF 0 730843e1988Sjohnlev #define SHUTDOWN_REBOOT 1 731843e1988Sjohnlev #define SHUTDOWN_SUSPEND 2 732843e1988Sjohnlev #define SHUTDOWN_HALT 3 733843e1988Sjohnlev #define SHUTDOWN_MAX 4 734843e1988Sjohnlev 735843e1988Sjohnlev #define SHUTDOWN_TIMEOUT_SECS (60 * 5) 736843e1988Sjohnlev 737843e1988Sjohnlev static const char *cmd_strings[SHUTDOWN_MAX] = { 738843e1988Sjohnlev "poweroff", 739843e1988Sjohnlev "reboot", 740843e1988Sjohnlev "suspend", 741843e1988Sjohnlev "halt" 742843e1988Sjohnlev }; 743843e1988Sjohnlev 744843e1988Sjohnlev static void 745843e1988Sjohnlev xen_dirty_shutdown(void *arg) 746843e1988Sjohnlev { 747843e1988Sjohnlev int cmd = (uintptr_t)arg; 748843e1988Sjohnlev 749843e1988Sjohnlev cmn_err(CE_WARN, "Externally requested shutdown failed or " 750843e1988Sjohnlev "timed out.\nShutting down.\n"); 751843e1988Sjohnlev 752843e1988Sjohnlev switch (cmd) { 753843e1988Sjohnlev case SHUTDOWN_HALT: 754843e1988Sjohnlev case SHUTDOWN_POWEROFF: 755843e1988Sjohnlev (void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred); 756843e1988Sjohnlev break; 757843e1988Sjohnlev case SHUTDOWN_REBOOT: 758843e1988Sjohnlev (void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred); 759843e1988Sjohnlev break; 760843e1988Sjohnlev } 761843e1988Sjohnlev } 762843e1988Sjohnlev 763843e1988Sjohnlev static void 764843e1988Sjohnlev xen_shutdown(void *arg) 765843e1988Sjohnlev { 766843e1988Sjohnlev int cmd = (uintptr_t)arg; 767c48ac12eSjohnlev proc_t *initpp; 768843e1988Sjohnlev 769843e1988Sjohnlev ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX); 770843e1988Sjohnlev 771843e1988Sjohnlev if (cmd == SHUTDOWN_SUSPEND) { 772843e1988Sjohnlev xen_suspend_domain(); 773843e1988Sjohnlev return; 774843e1988Sjohnlev } 775843e1988Sjohnlev 776c48ac12eSjohnlev switch (cmd) { 777c48ac12eSjohnlev case SHUTDOWN_POWEROFF: 778c48ac12eSjohnlev force_shutdown_method = AD_POWEROFF; 779c48ac12eSjohnlev break; 780c48ac12eSjohnlev case SHUTDOWN_HALT: 781c48ac12eSjohnlev force_shutdown_method = AD_HALT; 782c48ac12eSjohnlev break; 783c48ac12eSjohnlev case SHUTDOWN_REBOOT: 784c48ac12eSjohnlev force_shutdown_method = AD_BOOT; 785c48ac12eSjohnlev break; 786c48ac12eSjohnlev } 787843e1988Sjohnlev 788c48ac12eSjohnlev /* 789c48ac12eSjohnlev * If we're still booting and init(1) isn't set up yet, simply halt. 790c48ac12eSjohnlev */ 791c48ac12eSjohnlev mutex_enter(&pidlock); 792c48ac12eSjohnlev initpp = prfind(P_INITPID); 793c48ac12eSjohnlev mutex_exit(&pidlock); 794c48ac12eSjohnlev if (initpp == NULL) { 795c48ac12eSjohnlev extern void halt(char *); 796c48ac12eSjohnlev halt("Power off the System"); /* just in case */ 797c48ac12eSjohnlev } 798843e1988Sjohnlev 799c48ac12eSjohnlev /* 800c48ac12eSjohnlev * else, graceful shutdown with inittab and all getting involved 801c48ac12eSjohnlev */ 802c48ac12eSjohnlev psignal(initpp, SIGPWR); 803843e1988Sjohnlev 804843e1988Sjohnlev (void) timeout(xen_dirty_shutdown, arg, 805843e1988Sjohnlev SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC)); 806843e1988Sjohnlev } 807843e1988Sjohnlev 808843e1988Sjohnlev /*ARGSUSED*/ 809843e1988Sjohnlev static void 810843e1988Sjohnlev xen_shutdown_handler(struct xenbus_watch *watch, const char **vec, 811843e1988Sjohnlev unsigned int len) 812843e1988Sjohnlev { 813843e1988Sjohnlev char *str; 814843e1988Sjohnlev xenbus_transaction_t xbt; 815843e1988Sjohnlev int err, shutdown_code = SHUTDOWN_INVALID; 816843e1988Sjohnlev unsigned int slen; 817843e1988Sjohnlev 818843e1988Sjohnlev again: 819843e1988Sjohnlev err = xenbus_transaction_start(&xbt); 820843e1988Sjohnlev if (err) 821843e1988Sjohnlev return; 822843e1988Sjohnlev if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) { 823843e1988Sjohnlev (void) xenbus_transaction_end(xbt, 1); 824843e1988Sjohnlev return; 825843e1988Sjohnlev } 826843e1988Sjohnlev 827843e1988Sjohnlev SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str); 828843e1988Sjohnlev 829843e1988Sjohnlev /* 830843e1988Sjohnlev * If this is a watch fired from our write below, check out early to 831843e1988Sjohnlev * avoid an infinite loop. 832843e1988Sjohnlev */ 833843e1988Sjohnlev if (strcmp(str, "") == 0) { 834843e1988Sjohnlev (void) xenbus_transaction_end(xbt, 0); 835843e1988Sjohnlev kmem_free(str, slen); 836843e1988Sjohnlev return; 837843e1988Sjohnlev } else if (strcmp(str, "poweroff") == 0) { 838843e1988Sjohnlev shutdown_code = SHUTDOWN_POWEROFF; 839843e1988Sjohnlev } else if (strcmp(str, "reboot") == 0) { 840843e1988Sjohnlev shutdown_code = SHUTDOWN_REBOOT; 841843e1988Sjohnlev } else if (strcmp(str, "suspend") == 0) { 842843e1988Sjohnlev shutdown_code = SHUTDOWN_SUSPEND; 843843e1988Sjohnlev } else if (strcmp(str, "halt") == 0) { 844843e1988Sjohnlev shutdown_code = SHUTDOWN_HALT; 845843e1988Sjohnlev } else { 846843e1988Sjohnlev printf("Ignoring shutdown request: %s\n", str); 847843e1988Sjohnlev } 848843e1988Sjohnlev 849843e1988Sjohnlev /* 850843e1988Sjohnlev * XXPV Should we check the value of xenbus_write() too, or are all 851843e1988Sjohnlev * errors automatically folded into xenbus_transaction_end() ?? 852843e1988Sjohnlev */ 853843e1988Sjohnlev (void) xenbus_write(xbt, "control", "shutdown", ""); 854843e1988Sjohnlev err = xenbus_transaction_end(xbt, 0); 855843e1988Sjohnlev if (err == EAGAIN) { 856843e1988Sjohnlev SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id); 857843e1988Sjohnlev kmem_free(str, slen); 858843e1988Sjohnlev goto again; 859843e1988Sjohnlev } 860843e1988Sjohnlev 861843e1988Sjohnlev kmem_free(str, slen); 862843e1988Sjohnlev if (shutdown_code != SHUTDOWN_INVALID) { 863843e1988Sjohnlev (void) taskq_dispatch(xen_shutdown_tq, xen_shutdown, 864843e1988Sjohnlev (void *)(intptr_t)shutdown_code, 0); 865843e1988Sjohnlev } 866843e1988Sjohnlev } 867843e1988Sjohnlev 868843e1988Sjohnlev static struct xenbus_watch shutdown_watch; 869843e1988Sjohnlev static struct xenbus_watch sysrq_watch; 870843e1988Sjohnlev 871843e1988Sjohnlev void 872843e1988Sjohnlev xen_late_startup(void) 873843e1988Sjohnlev { 874843e1988Sjohnlev if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 875843e1988Sjohnlev xen_shutdown_tq = taskq_create("shutdown_taskq", 1, 876843e1988Sjohnlev maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE); 877843e1988Sjohnlev shutdown_watch.node = "control/shutdown"; 878843e1988Sjohnlev shutdown_watch.callback = xen_shutdown_handler; 879843e1988Sjohnlev if (register_xenbus_watch(&shutdown_watch)) 880843e1988Sjohnlev cmn_err(CE_WARN, "Failed to set shutdown watcher"); 881843e1988Sjohnlev 882843e1988Sjohnlev sysrq_watch.node = "control/sysrq"; 883843e1988Sjohnlev sysrq_watch.callback = xen_sysrq_handler; 884843e1988Sjohnlev if (register_xenbus_watch(&sysrq_watch)) 885843e1988Sjohnlev cmn_err(CE_WARN, "Failed to set sysrq watcher"); 886843e1988Sjohnlev } 887843e1988Sjohnlev balloon_init(xen_info->nr_pages); 888843e1988Sjohnlev } 889843e1988Sjohnlev 890843e1988Sjohnlev #ifdef DEBUG 891843e1988Sjohnlev #define XEN_PRINTF_BUFSIZE 1024 892843e1988Sjohnlev 893843e1988Sjohnlev char xen_printf_buffer[XEN_PRINTF_BUFSIZE]; 894843e1988Sjohnlev 895843e1988Sjohnlev /* 896843e1988Sjohnlev * Printf function that calls hypervisor directly. For DomU it only 897843e1988Sjohnlev * works when running on a xen hypervisor built with debug on. Works 898843e1988Sjohnlev * always since no I/O ring interaction is needed. 899843e1988Sjohnlev */ 900843e1988Sjohnlev /*PRINTFLIKE1*/ 901843e1988Sjohnlev void 902843e1988Sjohnlev xen_printf(const char *fmt, ...) 903843e1988Sjohnlev { 904843e1988Sjohnlev va_list ap; 905843e1988Sjohnlev 906843e1988Sjohnlev va_start(ap, fmt); 907843e1988Sjohnlev (void) vsnprintf(xen_printf_buffer, XEN_PRINTF_BUFSIZE, fmt, ap); 908843e1988Sjohnlev va_end(ap); 909843e1988Sjohnlev 910843e1988Sjohnlev (void) HYPERVISOR_console_io(CONSOLEIO_write, 911843e1988Sjohnlev strlen(xen_printf_buffer), xen_printf_buffer); 912843e1988Sjohnlev } 913843e1988Sjohnlev #else 914843e1988Sjohnlev void 915843e1988Sjohnlev xen_printf(const char *fmt, ...) 916843e1988Sjohnlev { 917843e1988Sjohnlev } 918843e1988Sjohnlev #endif /* DEBUG */ 919843e1988Sjohnlev 920843e1988Sjohnlev void 921ab4a9bebSjohnlev startup_xen_version(void) 922843e1988Sjohnlev { 9239e839ce9Sgarypen xen_set_version(XENVER_BOOT_IDX); 9249e839ce9Sgarypen if (xen_hypervisor_supports_solaris(XEN_RUN_CHECK) == 0) 9259e839ce9Sgarypen cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s " 9269e839ce9Sgarypen "but need at least version v3.0.4", 9279e839ce9Sgarypen XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor), 9289e839ce9Sgarypen XENVER_CURRENT(xv_ver)); 929ab4a9bebSjohnlev xen_pte_workaround(); 930843e1988Sjohnlev } 931843e1988Sjohnlev 932e4b86885SCheng Sean Ye int xen_mca_simulate_mc_physinfo_failure = 0; 933e4b86885SCheng Sean Ye 934e4b86885SCheng Sean Ye void 935e4b86885SCheng Sean Ye startup_xen_mca(void) 936e4b86885SCheng Sean Ye { 937e4b86885SCheng Sean Ye if (!DOMAIN_IS_INITDOMAIN(xen_info)) 938e4b86885SCheng Sean Ye return; 939e4b86885SCheng Sean Ye 940e4b86885SCheng Sean Ye xen_phys_ncpus = 0; 941e4b86885SCheng Sean Ye xen_phys_cpus = NULL; 942e4b86885SCheng Sean Ye 943e4b86885SCheng Sean Ye if (xen_mca_simulate_mc_physinfo_failure || 944e4b86885SCheng Sean Ye xen_get_mc_physcpuinfo(NULL, &xen_phys_ncpus) != 0) { 945e4b86885SCheng Sean Ye cmn_err(CE_WARN, 946e4b86885SCheng Sean Ye "%sxen_get_mc_physinfo failure during xen MCA startup: " 947e4b86885SCheng Sean Ye "there will be no machine check support", 948e4b86885SCheng Sean Ye xen_mca_simulate_mc_physinfo_failure ? "(simulated) " : ""); 949e4b86885SCheng Sean Ye return; 950e4b86885SCheng Sean Ye } 951e4b86885SCheng Sean Ye 952e4b86885SCheng Sean Ye xen_phys_cpus = kmem_alloc(xen_phys_ncpus * 953e4b86885SCheng Sean Ye sizeof (xen_mc_logical_cpu_t), KM_NOSLEEP); 954e4b86885SCheng Sean Ye 955e4b86885SCheng Sean Ye if (xen_phys_cpus == NULL) { 956e4b86885SCheng Sean Ye cmn_err(CE_WARN, 957*349b53ddSStuart Maybee "xen_get_mc_physinfo failure: can't allocate CPU array"); 958e4b86885SCheng Sean Ye return; 959e4b86885SCheng Sean Ye } 960e4b86885SCheng Sean Ye 961e4b86885SCheng Sean Ye if (xen_get_mc_physcpuinfo(xen_phys_cpus, &xen_phys_ncpus) != 0) { 962e4b86885SCheng Sean Ye cmn_err(CE_WARN, "xen_get_mc_physinfo failure: no " 963e4b86885SCheng Sean Ye "physical CPU info"); 964e4b86885SCheng Sean Ye kmem_free(xen_phys_cpus, 965e4b86885SCheng Sean Ye xen_phys_ncpus * sizeof (xen_mc_logical_cpu_t)); 966e4b86885SCheng Sean Ye xen_phys_ncpus = 0; 967e4b86885SCheng Sean Ye xen_phys_cpus = NULL; 968e4b86885SCheng Sean Ye } 969e4b86885SCheng Sean Ye 970e4b86885SCheng Sean Ye if (xen_physinfo_debug) { 971e4b86885SCheng Sean Ye xen_mc_logical_cpu_t *xcp; 972e4b86885SCheng Sean Ye unsigned i; 973e4b86885SCheng Sean Ye 974e4b86885SCheng Sean Ye cmn_err(CE_NOTE, "xvm mca: %u physical cpus:\n", 975e4b86885SCheng Sean Ye xen_phys_ncpus); 976e4b86885SCheng Sean Ye for (i = 0; i < xen_phys_ncpus; i++) { 977e4b86885SCheng Sean Ye xcp = &xen_phys_cpus[i]; 978e4b86885SCheng Sean Ye cmn_err(CE_NOTE, "cpu%u: (%u, %u, %u) apid %u", 979e4b86885SCheng Sean Ye xcp->mc_cpunr, xcp->mc_chipid, xcp->mc_coreid, 980e4b86885SCheng Sean Ye xcp->mc_threadid, xcp->mc_apicid); 981e4b86885SCheng Sean Ye } 982e4b86885SCheng Sean Ye } 983e4b86885SCheng Sean Ye } 984e4b86885SCheng Sean Ye 985843e1988Sjohnlev /* 986843e1988Sjohnlev * Miscellaneous hypercall wrappers with slightly more verbose diagnostics. 987843e1988Sjohnlev */ 988843e1988Sjohnlev 989843e1988Sjohnlev void 990843e1988Sjohnlev xen_set_gdt(ulong_t *frame_list, int entries) 991843e1988Sjohnlev { 992843e1988Sjohnlev int err; 993843e1988Sjohnlev if ((err = HYPERVISOR_set_gdt(frame_list, entries)) != 0) { 994843e1988Sjohnlev /* 995843e1988Sjohnlev * X_EINVAL: reserved entry or bad frames 996843e1988Sjohnlev * X_EFAULT: bad address 997843e1988Sjohnlev */ 998843e1988Sjohnlev panic("xen_set_gdt(%p, %d): error %d", 999843e1988Sjohnlev (void *)frame_list, entries, -(int)err); 1000843e1988Sjohnlev } 1001843e1988Sjohnlev } 1002843e1988Sjohnlev 1003843e1988Sjohnlev void 1004843e1988Sjohnlev xen_set_ldt(user_desc_t *ldt, uint_t nsels) 1005843e1988Sjohnlev { 1006843e1988Sjohnlev struct mmuext_op op; 1007843e1988Sjohnlev long err; 1008843e1988Sjohnlev 1009843e1988Sjohnlev op.cmd = MMUEXT_SET_LDT; 1010843e1988Sjohnlev op.arg1.linear_addr = (uintptr_t)ldt; 1011843e1988Sjohnlev op.arg2.nr_ents = nsels; 1012843e1988Sjohnlev 1013843e1988Sjohnlev if ((err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) != 0) { 1014843e1988Sjohnlev panic("xen_set_ldt(%p, %d): error %d", 1015843e1988Sjohnlev (void *)ldt, nsels, -(int)err); 1016843e1988Sjohnlev } 1017843e1988Sjohnlev } 1018843e1988Sjohnlev 1019843e1988Sjohnlev void 1020843e1988Sjohnlev xen_stack_switch(ulong_t ss, ulong_t esp) 1021843e1988Sjohnlev { 1022843e1988Sjohnlev long err; 1023843e1988Sjohnlev 1024843e1988Sjohnlev if ((err = HYPERVISOR_stack_switch(ss, esp)) != 0) { 1025843e1988Sjohnlev /* 1026843e1988Sjohnlev * X_EPERM: bad selector 1027843e1988Sjohnlev */ 1028843e1988Sjohnlev panic("xen_stack_switch(%lx, %lx): error %d", ss, esp, 1029843e1988Sjohnlev -(int)err); 1030843e1988Sjohnlev } 1031843e1988Sjohnlev } 1032843e1988Sjohnlev 1033843e1988Sjohnlev long 1034843e1988Sjohnlev xen_set_trap_table(trap_info_t *table) 1035843e1988Sjohnlev { 1036843e1988Sjohnlev long err; 1037843e1988Sjohnlev 1038843e1988Sjohnlev if ((err = HYPERVISOR_set_trap_table(table)) != 0) { 1039843e1988Sjohnlev /* 1040843e1988Sjohnlev * X_EFAULT: bad address 1041843e1988Sjohnlev * X_EPERM: bad selector 1042843e1988Sjohnlev */ 1043843e1988Sjohnlev panic("xen_set_trap_table(%p): error %d", (void *)table, 1044843e1988Sjohnlev -(int)err); 1045843e1988Sjohnlev } 1046843e1988Sjohnlev return (err); 1047843e1988Sjohnlev } 1048843e1988Sjohnlev 1049843e1988Sjohnlev #if defined(__amd64) 1050843e1988Sjohnlev void 1051843e1988Sjohnlev xen_set_segment_base(int reg, ulong_t value) 1052843e1988Sjohnlev { 1053843e1988Sjohnlev long err; 1054843e1988Sjohnlev 1055843e1988Sjohnlev if ((err = HYPERVISOR_set_segment_base(reg, value)) != 0) { 1056843e1988Sjohnlev /* 1057843e1988Sjohnlev * X_EFAULT: bad address 1058843e1988Sjohnlev * X_EINVAL: bad type 1059843e1988Sjohnlev */ 1060843e1988Sjohnlev panic("xen_set_segment_base(%d, %lx): error %d", 1061843e1988Sjohnlev reg, value, -(int)err); 1062843e1988Sjohnlev } 1063843e1988Sjohnlev } 1064843e1988Sjohnlev #endif /* __amd64 */ 1065843e1988Sjohnlev 1066843e1988Sjohnlev /* 1067843e1988Sjohnlev * Translate a hypervisor errcode to a Solaris error code. 1068843e1988Sjohnlev */ 1069843e1988Sjohnlev int 1070843e1988Sjohnlev xen_xlate_errcode(int error) 1071843e1988Sjohnlev { 1072843e1988Sjohnlev switch (-error) { 1073843e1988Sjohnlev 1074843e1988Sjohnlev /* 1075843e1988Sjohnlev * Translate hypervisor errno's into native errno's 1076843e1988Sjohnlev */ 1077843e1988Sjohnlev 1078843e1988Sjohnlev #define CASE(num) case X_##num: error = num; break 1079843e1988Sjohnlev 1080843e1988Sjohnlev CASE(EPERM); CASE(ENOENT); CASE(ESRCH); 1081843e1988Sjohnlev CASE(EINTR); CASE(EIO); CASE(ENXIO); 1082843e1988Sjohnlev CASE(E2BIG); CASE(ENOMEM); CASE(EACCES); 1083843e1988Sjohnlev CASE(EFAULT); CASE(EBUSY); CASE(EEXIST); 1084843e1988Sjohnlev CASE(ENODEV); CASE(EISDIR); CASE(EINVAL); 1085843e1988Sjohnlev CASE(ENOSPC); CASE(ESPIPE); CASE(EROFS); 1086843e1988Sjohnlev CASE(ENOSYS); CASE(ENOTEMPTY); CASE(EISCONN); 1087*349b53ddSStuart Maybee CASE(ENODATA); CASE(EAGAIN); 1088843e1988Sjohnlev 1089843e1988Sjohnlev #undef CASE 1090843e1988Sjohnlev 1091843e1988Sjohnlev default: 1092843e1988Sjohnlev panic("xen_xlate_errcode: unknown error %d", error); 1093843e1988Sjohnlev } 1094843e1988Sjohnlev 1095843e1988Sjohnlev return (error); 1096843e1988Sjohnlev } 1097843e1988Sjohnlev 1098843e1988Sjohnlev /* 1099843e1988Sjohnlev * Raise PS_IOPL on current vcpu to user level. 1100843e1988Sjohnlev * Caller responsible for preventing kernel preemption. 1101843e1988Sjohnlev */ 1102843e1988Sjohnlev void 1103843e1988Sjohnlev xen_enable_user_iopl(void) 1104843e1988Sjohnlev { 1105843e1988Sjohnlev physdev_set_iopl_t set_iopl; 1106843e1988Sjohnlev set_iopl.iopl = 3; /* user ring 3 */ 1107843e1988Sjohnlev (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 1108843e1988Sjohnlev } 1109843e1988Sjohnlev 1110843e1988Sjohnlev /* 1111843e1988Sjohnlev * Drop PS_IOPL on current vcpu to kernel level 1112843e1988Sjohnlev */ 1113843e1988Sjohnlev void 1114843e1988Sjohnlev xen_disable_user_iopl(void) 1115843e1988Sjohnlev { 1116843e1988Sjohnlev physdev_set_iopl_t set_iopl; 1117843e1988Sjohnlev set_iopl.iopl = 1; /* kernel pseudo ring 1 */ 1118843e1988Sjohnlev (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 1119843e1988Sjohnlev } 1120843e1988Sjohnlev 1121843e1988Sjohnlev int 1122843e1988Sjohnlev xen_gdt_setprot(cpu_t *cp, uint_t prot) 1123843e1988Sjohnlev { 1124843e1988Sjohnlev int err; 1125843e1988Sjohnlev #if defined(__amd64) 1126843e1988Sjohnlev int pt_bits = PT_VALID; 1127843e1988Sjohnlev if (prot & PROT_WRITE) 1128843e1988Sjohnlev pt_bits |= PT_WRITABLE; 1129843e1988Sjohnlev #endif 1130843e1988Sjohnlev 1131843e1988Sjohnlev if ((err = as_setprot(&kas, (caddr_t)cp->cpu_gdt, 1132843e1988Sjohnlev MMU_PAGESIZE, prot)) != 0) 1133843e1988Sjohnlev goto done; 1134843e1988Sjohnlev 1135843e1988Sjohnlev #if defined(__amd64) 1136843e1988Sjohnlev err = xen_kpm_page(mmu_btop(cp->cpu_m.mcpu_gdtpa), pt_bits); 1137843e1988Sjohnlev #endif 1138843e1988Sjohnlev 1139843e1988Sjohnlev done: 1140843e1988Sjohnlev if (err) { 1141843e1988Sjohnlev cmn_err(CE_WARN, "cpu%d: xen_gdt_setprot(%s) failed: error %d", 1142843e1988Sjohnlev cp->cpu_id, (prot & PROT_WRITE) ? "writable" : "read-only", 1143843e1988Sjohnlev err); 1144843e1988Sjohnlev } 1145843e1988Sjohnlev 1146843e1988Sjohnlev return (err); 1147843e1988Sjohnlev } 1148843e1988Sjohnlev 1149843e1988Sjohnlev int 1150843e1988Sjohnlev xen_ldt_setprot(user_desc_t *ldt, size_t lsize, uint_t prot) 1151843e1988Sjohnlev { 1152843e1988Sjohnlev int err; 1153843e1988Sjohnlev caddr_t lva = (caddr_t)ldt; 1154843e1988Sjohnlev #if defined(__amd64) 1155843e1988Sjohnlev int pt_bits = PT_VALID; 1156843e1988Sjohnlev pgcnt_t npgs; 1157843e1988Sjohnlev if (prot & PROT_WRITE) 1158843e1988Sjohnlev pt_bits |= PT_WRITABLE; 1159843e1988Sjohnlev #endif /* __amd64 */ 1160843e1988Sjohnlev 1161843e1988Sjohnlev if ((err = as_setprot(&kas, (caddr_t)ldt, lsize, prot)) != 0) 1162843e1988Sjohnlev goto done; 1163843e1988Sjohnlev 1164843e1988Sjohnlev #if defined(__amd64) 1165843e1988Sjohnlev 1166843e1988Sjohnlev ASSERT(IS_P2ALIGNED(lsize, PAGESIZE)); 1167843e1988Sjohnlev npgs = mmu_btop(lsize); 1168843e1988Sjohnlev while (npgs--) { 1169843e1988Sjohnlev if ((err = xen_kpm_page(hat_getpfnum(kas.a_hat, lva), 1170843e1988Sjohnlev pt_bits)) != 0) 1171843e1988Sjohnlev break; 1172843e1988Sjohnlev lva += PAGESIZE; 1173843e1988Sjohnlev } 1174843e1988Sjohnlev #endif /* __amd64 */ 1175843e1988Sjohnlev 1176843e1988Sjohnlev done: 1177843e1988Sjohnlev if (err) { 1178843e1988Sjohnlev cmn_err(CE_WARN, "xen_ldt_setprot(%p, %s) failed: error %d", 1179843e1988Sjohnlev (void *)lva, 1180843e1988Sjohnlev (prot & PROT_WRITE) ? "writable" : "read-only", err); 1181843e1988Sjohnlev } 1182843e1988Sjohnlev 1183843e1988Sjohnlev return (err); 1184843e1988Sjohnlev } 1185e4b86885SCheng Sean Ye 1186e4b86885SCheng Sean Ye int 1187e4b86885SCheng Sean Ye xen_get_mc_physcpuinfo(xen_mc_logical_cpu_t *log_cpus, uint_t *ncpus) 1188e4b86885SCheng Sean Ye { 1189e4b86885SCheng Sean Ye struct xen_mc_physcpuinfo cpi; 1190e4b86885SCheng Sean Ye 1191e4b86885SCheng Sean Ye cpi.ncpus = *ncpus; 1192e4b86885SCheng Sean Ye /*LINTED: constant in conditional context*/ 1193e4b86885SCheng Sean Ye set_xen_guest_handle(cpi.info, log_cpus); 1194e4b86885SCheng Sean Ye 1195*349b53ddSStuart Maybee if (HYPERVISOR_mca(XEN_MC_physcpuinfo, (xen_mc_arg_t *)&cpi) != 0) 1196e4b86885SCheng Sean Ye return (-1); 1197e4b86885SCheng Sean Ye 1198e4b86885SCheng Sean Ye *ncpus = cpi.ncpus; 1199e4b86885SCheng Sean Ye return (0); 1200e4b86885SCheng Sean Ye } 1201e4b86885SCheng Sean Ye 1202e4b86885SCheng Sean Ye void 1203e4b86885SCheng Sean Ye print_panic(const char *str) 1204e4b86885SCheng Sean Ye { 1205e4b86885SCheng Sean Ye xen_printf(str); 1206e4b86885SCheng Sean Ye } 1207e4b86885SCheng Sean Ye 1208e4b86885SCheng Sean Ye /* 1209e4b86885SCheng Sean Ye * Interfaces to iterate over real cpu information, but only that info 1210e4b86885SCheng Sean Ye * which we choose to expose here. These are of interest to dom0 1211e4b86885SCheng Sean Ye * only (and the backing hypercall should not work for domu). 1212e4b86885SCheng Sean Ye */ 1213e4b86885SCheng Sean Ye 1214e4b86885SCheng Sean Ye xen_mc_lcpu_cookie_t 1215e4b86885SCheng Sean Ye xen_physcpu_next(xen_mc_lcpu_cookie_t cookie) 1216e4b86885SCheng Sean Ye { 1217e4b86885SCheng Sean Ye xen_mc_logical_cpu_t *xcp = (xen_mc_logical_cpu_t *)cookie; 1218e4b86885SCheng Sean Ye 1219e4b86885SCheng Sean Ye if (!DOMAIN_IS_INITDOMAIN(xen_info)) 1220e4b86885SCheng Sean Ye return (NULL); 1221e4b86885SCheng Sean Ye 1222e4b86885SCheng Sean Ye if (cookie == NULL) 1223e4b86885SCheng Sean Ye return ((xen_mc_lcpu_cookie_t)xen_phys_cpus); 1224e4b86885SCheng Sean Ye 1225e4b86885SCheng Sean Ye if (xcp == xen_phys_cpus + xen_phys_ncpus - 1) 1226e4b86885SCheng Sean Ye return (NULL); 1227e4b86885SCheng Sean Ye else 1228e4b86885SCheng Sean Ye return ((xen_mc_lcpu_cookie_t)++xcp); 1229e4b86885SCheng Sean Ye } 1230e4b86885SCheng Sean Ye 1231e4b86885SCheng Sean Ye #define COOKIE2XCP(c) ((xen_mc_logical_cpu_t *)(c)) 1232e4b86885SCheng Sean Ye 1233e4b86885SCheng Sean Ye const char * 1234e4b86885SCheng Sean Ye xen_physcpu_vendorstr(xen_mc_lcpu_cookie_t cookie) 1235e4b86885SCheng Sean Ye { 1236e4b86885SCheng Sean Ye xen_mc_logical_cpu_t *xcp = COOKIE2XCP(cookie); 1237e4b86885SCheng Sean Ye 1238e4b86885SCheng Sean Ye return ((const char *)&xcp->mc_vendorid[0]); 1239e4b86885SCheng Sean Ye } 1240e4b86885SCheng Sean Ye 1241e4b86885SCheng Sean Ye int 1242e4b86885SCheng Sean Ye xen_physcpu_family(xen_mc_lcpu_cookie_t cookie) 1243e4b86885SCheng Sean Ye { 1244e4b86885SCheng Sean Ye return (COOKIE2XCP(cookie)->mc_family); 1245e4b86885SCheng Sean Ye } 1246e4b86885SCheng Sean Ye 1247e4b86885SCheng Sean Ye int 1248e4b86885SCheng Sean Ye xen_physcpu_model(xen_mc_lcpu_cookie_t cookie) 1249e4b86885SCheng Sean Ye { 1250e4b86885SCheng Sean Ye return (COOKIE2XCP(cookie)->mc_model); 1251e4b86885SCheng Sean Ye } 1252e4b86885SCheng Sean Ye 1253e4b86885SCheng Sean Ye int 1254e4b86885SCheng Sean Ye xen_physcpu_stepping(xen_mc_lcpu_cookie_t cookie) 1255e4b86885SCheng Sean Ye { 1256e4b86885SCheng Sean Ye return (COOKIE2XCP(cookie)->mc_step); 1257e4b86885SCheng Sean Ye } 1258e4b86885SCheng Sean Ye 1259e4b86885SCheng Sean Ye id_t 1260e4b86885SCheng Sean Ye xen_physcpu_chipid(xen_mc_lcpu_cookie_t cookie) 1261e4b86885SCheng Sean Ye { 1262e4b86885SCheng Sean Ye return (COOKIE2XCP(cookie)->mc_chipid); 1263e4b86885SCheng Sean Ye } 1264e4b86885SCheng Sean Ye 1265e4b86885SCheng Sean Ye id_t 1266e4b86885SCheng Sean Ye xen_physcpu_coreid(xen_mc_lcpu_cookie_t cookie) 1267e4b86885SCheng Sean Ye { 1268e4b86885SCheng Sean Ye return (COOKIE2XCP(cookie)->mc_coreid); 1269e4b86885SCheng Sean Ye } 1270e4b86885SCheng Sean Ye 1271e4b86885SCheng Sean Ye id_t 1272e4b86885SCheng Sean Ye xen_physcpu_strandid(xen_mc_lcpu_cookie_t cookie) 1273e4b86885SCheng Sean Ye { 1274e4b86885SCheng Sean Ye return (COOKIE2XCP(cookie)->mc_threadid); 1275e4b86885SCheng Sean Ye } 1276e4b86885SCheng Sean Ye 1277e4b86885SCheng Sean Ye id_t 1278e4b86885SCheng Sean Ye xen_physcpu_logical_id(xen_mc_lcpu_cookie_t cookie) 1279e4b86885SCheng Sean Ye { 1280e4b86885SCheng Sean Ye return (COOKIE2XCP(cookie)->mc_cpunr); 1281e4b86885SCheng Sean Ye } 1282e4b86885SCheng Sean Ye 1283e4b86885SCheng Sean Ye boolean_t 1284e4b86885SCheng Sean Ye xen_physcpu_is_cmt(xen_mc_lcpu_cookie_t cookie) 1285e4b86885SCheng Sean Ye { 1286e4b86885SCheng Sean Ye return (COOKIE2XCP(cookie)->mc_nthreads > 1); 1287e4b86885SCheng Sean Ye } 1288e4b86885SCheng Sean Ye 1289e4b86885SCheng Sean Ye uint64_t 1290e4b86885SCheng Sean Ye xen_physcpu_mcg_cap(xen_mc_lcpu_cookie_t cookie) 1291e4b86885SCheng Sean Ye { 1292e4b86885SCheng Sean Ye xen_mc_logical_cpu_t *xcp = COOKIE2XCP(cookie); 1293e4b86885SCheng Sean Ye 1294e4b86885SCheng Sean Ye /* 1295e4b86885SCheng Sean Ye * Need to #define the indices, or search through the array. 1296e4b86885SCheng Sean Ye */ 1297e4b86885SCheng Sean Ye return (xcp->mc_msrvalues[0].value); 1298e4b86885SCheng Sean Ye } 12997eea693dSMark Johnson 13007eea693dSMark Johnson int 13017eea693dSMark Johnson xen_map_gref(uint_t cmd, gnttab_map_grant_ref_t *mapop, uint_t count, 13027eea693dSMark Johnson boolean_t uvaddr) 13037eea693dSMark Johnson { 13047eea693dSMark Johnson long rc; 1305*349b53ddSStuart Maybee uint_t i; 13067eea693dSMark Johnson 13077eea693dSMark Johnson ASSERT(cmd == GNTTABOP_map_grant_ref); 13087eea693dSMark Johnson 13097eea693dSMark Johnson #if !defined(_BOOT) 1310*349b53ddSStuart Maybee if (uvaddr == B_FALSE) { 13117eea693dSMark Johnson for (i = 0; i < count; ++i) { 1312*349b53ddSStuart Maybee mapop[i].flags |= (PT_FOREIGN <<_GNTMAP_guest_avail0); 13137eea693dSMark Johnson } 13147eea693dSMark Johnson } 13157eea693dSMark Johnson #endif 13167eea693dSMark Johnson 1317*349b53ddSStuart Maybee rc = HYPERVISOR_grant_table_op(cmd, mapop, count); 1318*349b53ddSStuart Maybee 13197eea693dSMark Johnson return (rc); 13207eea693dSMark Johnson } 1321*349b53ddSStuart Maybee 1322*349b53ddSStuart Maybee static int 1323*349b53ddSStuart Maybee xpv_get_physinfo(xen_sysctl_physinfo_t *pi) 1324*349b53ddSStuart Maybee { 1325*349b53ddSStuart Maybee xen_sysctl_t op; 1326*349b53ddSStuart Maybee struct sp { void *p; } *sp = (struct sp *)&op.u.physinfo.cpu_to_node; 1327*349b53ddSStuart Maybee int ret; 1328*349b53ddSStuart Maybee 1329*349b53ddSStuart Maybee bzero(&op, sizeof (op)); 1330*349b53ddSStuart Maybee op.cmd = XEN_SYSCTL_physinfo; 1331*349b53ddSStuart Maybee op.interface_version = XEN_SYSCTL_INTERFACE_VERSION; 1332*349b53ddSStuart Maybee /*LINTED: constant in conditional context*/ 1333*349b53ddSStuart Maybee set_xen_guest_handle(*sp, NULL); 1334*349b53ddSStuart Maybee 1335*349b53ddSStuart Maybee ret = HYPERVISOR_sysctl(&op); 1336*349b53ddSStuart Maybee 1337*349b53ddSStuart Maybee if (ret != 0) 1338*349b53ddSStuart Maybee return (xen_xlate_errcode(ret)); 1339*349b53ddSStuart Maybee 1340*349b53ddSStuart Maybee bcopy(&op.u.physinfo, pi, sizeof (op.u.physinfo)); 1341*349b53ddSStuart Maybee return (0); 1342*349b53ddSStuart Maybee } 1343*349b53ddSStuart Maybee 1344*349b53ddSStuart Maybee /* 1345*349b53ddSStuart Maybee * On dom0, we can determine the number of physical cpus on the machine. 1346*349b53ddSStuart Maybee * This number is important when figuring out what workarounds are 1347*349b53ddSStuart Maybee * appropriate, so compute it now. 1348*349b53ddSStuart Maybee */ 1349*349b53ddSStuart Maybee uint_t 1350*349b53ddSStuart Maybee xpv_nr_phys_cpus(void) 1351*349b53ddSStuart Maybee { 1352*349b53ddSStuart Maybee static uint_t nphyscpus = 0; 1353*349b53ddSStuart Maybee 1354*349b53ddSStuart Maybee ASSERT(DOMAIN_IS_INITDOMAIN(xen_info)); 1355*349b53ddSStuart Maybee 1356*349b53ddSStuart Maybee if (nphyscpus == 0) { 1357*349b53ddSStuart Maybee xen_sysctl_physinfo_t pi; 1358*349b53ddSStuart Maybee int ret; 1359*349b53ddSStuart Maybee 1360*349b53ddSStuart Maybee if ((ret = xpv_get_physinfo(&pi)) != 0) 1361*349b53ddSStuart Maybee panic("xpv_get_physinfo() failed: %d\n", ret); 1362*349b53ddSStuart Maybee nphyscpus = pi.nr_cpus; 1363*349b53ddSStuart Maybee } 1364*349b53ddSStuart Maybee return (nphyscpus); 1365*349b53ddSStuart Maybee } 1366*349b53ddSStuart Maybee 1367*349b53ddSStuart Maybee pgcnt_t 1368*349b53ddSStuart Maybee xpv_nr_phys_pages(void) 1369*349b53ddSStuart Maybee { 1370*349b53ddSStuart Maybee xen_sysctl_physinfo_t pi; 1371*349b53ddSStuart Maybee int ret; 1372*349b53ddSStuart Maybee 1373*349b53ddSStuart Maybee ASSERT(DOMAIN_IS_INITDOMAIN(xen_info)); 1374*349b53ddSStuart Maybee 1375*349b53ddSStuart Maybee if ((ret = xpv_get_physinfo(&pi)) != 0) 1376*349b53ddSStuart Maybee panic("xpv_get_physinfo() failed: %d\n", ret); 1377*349b53ddSStuart Maybee 1378*349b53ddSStuart Maybee return ((pgcnt_t)pi.total_pages); 1379*349b53ddSStuart Maybee } 1380*349b53ddSStuart Maybee 1381*349b53ddSStuart Maybee uint64_t 1382*349b53ddSStuart Maybee xpv_cpu_khz(void) 1383*349b53ddSStuart Maybee { 1384*349b53ddSStuart Maybee xen_sysctl_physinfo_t pi; 1385*349b53ddSStuart Maybee int ret; 1386*349b53ddSStuart Maybee 1387*349b53ddSStuart Maybee ASSERT(DOMAIN_IS_INITDOMAIN(xen_info)); 1388*349b53ddSStuart Maybee 1389*349b53ddSStuart Maybee if ((ret = xpv_get_physinfo(&pi)) != 0) 1390*349b53ddSStuart Maybee panic("xpv_get_physinfo() failed: %d\n", ret); 1391*349b53ddSStuart Maybee return ((uint64_t)pi.cpu_khz); 1392*349b53ddSStuart Maybee } 1393