1843e1988Sjohnlev /*
2843e1988Sjohnlev * CDDL HEADER START
3843e1988Sjohnlev *
4843e1988Sjohnlev * The contents of this file are subject to the terms of the
5843e1988Sjohnlev * Common Development and Distribution License (the "License").
6843e1988Sjohnlev * You may not use this file except in compliance with the License.
7843e1988Sjohnlev *
8843e1988Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e1988Sjohnlev * or http://www.opensolaris.org/os/licensing.
10843e1988Sjohnlev * See the License for the specific language governing permissions
11843e1988Sjohnlev * and limitations under the License.
12843e1988Sjohnlev *
13843e1988Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each
14843e1988Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e1988Sjohnlev * If applicable, add the following below this CDDL HEADER, with the
16843e1988Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying
17843e1988Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner]
18843e1988Sjohnlev *
19843e1988Sjohnlev * CDDL HEADER END
20843e1988Sjohnlev */
21843e1988Sjohnlev
22843e1988Sjohnlev /*
23349b53ddSStuart Maybee * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24843e1988Sjohnlev * Use is subject to license terms.
25843e1988Sjohnlev */
26843e1988Sjohnlev
27843e1988Sjohnlev /* derived from netbsd's xen_machdep.c 1.1.2.1 */
28843e1988Sjohnlev
29843e1988Sjohnlev /*
30843e1988Sjohnlev *
31843e1988Sjohnlev * Copyright (c) 2004 Christian Limpach.
32843e1988Sjohnlev * All rights reserved.
33843e1988Sjohnlev *
34843e1988Sjohnlev * Redistribution and use in source and binary forms, with or without
35843e1988Sjohnlev * modification, are permitted provided that the following conditions
36843e1988Sjohnlev * are met:
37843e1988Sjohnlev * 1. Redistributions of source code must retain the above copyright
38843e1988Sjohnlev * notice, this list of conditions and the following disclaimer.
39843e1988Sjohnlev * 2. Redistributions in binary form must reproduce the above copyright
40843e1988Sjohnlev * notice, this list of conditions and the following disclaimer in the
41843e1988Sjohnlev * documentation and/or other materials provided with the distribution.
42843e1988Sjohnlev * 3. This section intentionally left blank.
43843e1988Sjohnlev * 4. The name of the author may not be used to endorse or promote products
44843e1988Sjohnlev * derived from this software without specific prior written permission.
45843e1988Sjohnlev *
46843e1988Sjohnlev * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
47843e1988Sjohnlev * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
48843e1988Sjohnlev * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
49843e1988Sjohnlev * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
50843e1988Sjohnlev * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51843e1988Sjohnlev * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
52843e1988Sjohnlev * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
53843e1988Sjohnlev * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
54843e1988Sjohnlev * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
55843e1988Sjohnlev * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
56843e1988Sjohnlev */
57843e1988Sjohnlev /*
58843e1988Sjohnlev * Section 3 of the above license was updated in response to bug 6379571.
59843e1988Sjohnlev */
60843e1988Sjohnlev
61349b53ddSStuart Maybee #include <sys/xpv_user.h>
62349b53ddSStuart Maybee
63349b53ddSStuart Maybee /* XXX 3.3. TODO remove this include */
64349b53ddSStuart Maybee #include <xen/public/arch-x86/xen-mca.h>
65349b53ddSStuart Maybee
669e839ce9Sgarypen #include <sys/ctype.h>
67843e1988Sjohnlev #include <sys/types.h>
68843e1988Sjohnlev #include <sys/cmn_err.h>
69843e1988Sjohnlev #include <sys/trap.h>
70843e1988Sjohnlev #include <sys/segments.h>
71843e1988Sjohnlev #include <sys/hypervisor.h>
72843e1988Sjohnlev #include <sys/xen_mmu.h>
73843e1988Sjohnlev #include <sys/machsystm.h>
74843e1988Sjohnlev #include <sys/promif.h>
75843e1988Sjohnlev #include <sys/bootconf.h>
76843e1988Sjohnlev #include <sys/bootinfo.h>
77843e1988Sjohnlev #include <sys/cpr.h>
78843e1988Sjohnlev #include <sys/taskq.h>
79843e1988Sjohnlev #include <sys/uadmin.h>
80843e1988Sjohnlev #include <sys/evtchn_impl.h>
81843e1988Sjohnlev #include <sys/archsystm.h>
82843e1988Sjohnlev #include <xen/sys/xenbus_impl.h>
83843e1988Sjohnlev #include <sys/mach_mmu.h>
84843e1988Sjohnlev #include <vm/hat_i86.h>
85843e1988Sjohnlev #include <sys/gnttab.h>
86843e1988Sjohnlev #include <sys/reboot.h>
87843e1988Sjohnlev #include <sys/stack.h>
88843e1988Sjohnlev #include <sys/clock.h>
89843e1988Sjohnlev #include <sys/bitmap.h>
90843e1988Sjohnlev #include <sys/processor.h>
91843e1988Sjohnlev #include <sys/xen_errno.h>
92843e1988Sjohnlev #include <sys/xpv_panic.h>
93843e1988Sjohnlev #include <sys/smp_impldefs.h>
94843e1988Sjohnlev #include <sys/cpu.h>
95843e1988Sjohnlev #include <sys/balloon_impl.h>
96843e1988Sjohnlev #include <sys/ddi.h>
97843e1988Sjohnlev
98843e1988Sjohnlev #ifdef DEBUG
99843e1988Sjohnlev #define SUSPEND_DEBUG if (xen_suspend_debug) xen_printf
100843e1988Sjohnlev #else
101843e1988Sjohnlev #define SUSPEND_DEBUG(...)
102843e1988Sjohnlev #endif
103843e1988Sjohnlev
104843e1988Sjohnlev int cpr_debug;
105843e1988Sjohnlev cpuset_t cpu_suspend_lost_set;
106843e1988Sjohnlev static int xen_suspend_debug;
107843e1988Sjohnlev
108e4b86885SCheng Sean Ye uint_t xen_phys_ncpus;
109e4b86885SCheng Sean Ye xen_mc_logical_cpu_t *xen_phys_cpus;
110e4b86885SCheng Sean Ye int xen_physinfo_debug = 0;
111e4b86885SCheng Sean Ye
1129e839ce9Sgarypen /*
1139e839ce9Sgarypen * Determine helpful version information.
1149e839ce9Sgarypen *
1159e839ce9Sgarypen * (And leave copies in the data segment so we can look at them later
1169e839ce9Sgarypen * with e.g. kmdb.)
1179e839ce9Sgarypen */
1189e839ce9Sgarypen
1199e839ce9Sgarypen typedef enum xen_version {
1209e839ce9Sgarypen XENVER_BOOT_IDX,
1219e839ce9Sgarypen XENVER_CURRENT_IDX
1229e839ce9Sgarypen } xen_version_t;
1239e839ce9Sgarypen
1249e839ce9Sgarypen struct xenver {
1259e839ce9Sgarypen ulong_t xv_major;
1269e839ce9Sgarypen ulong_t xv_minor;
1279e839ce9Sgarypen ulong_t xv_revision;
1289e839ce9Sgarypen xen_extraversion_t xv_ver;
129ab4a9bebSjohnlev ulong_t xv_is_xvm;
1309e839ce9Sgarypen xen_changeset_info_t xv_chgset;
1319e839ce9Sgarypen xen_compile_info_t xv_build;
1329e839ce9Sgarypen xen_capabilities_info_t xv_caps;
1339e839ce9Sgarypen } xenver[2];
1349e839ce9Sgarypen
1359e839ce9Sgarypen #define XENVER_BOOT(m) (xenver[XENVER_BOOT_IDX].m)
1369e839ce9Sgarypen #define XENVER_CURRENT(m) (xenver[XENVER_CURRENT_IDX].m)
1379e839ce9Sgarypen
1389e839ce9Sgarypen /*
1399e839ce9Sgarypen * Update the xenver data. We maintain two copies, boot and
1409e839ce9Sgarypen * current. If we are setting the boot, then also set current.
1419e839ce9Sgarypen */
1429e839ce9Sgarypen static void
xen_set_version(xen_version_t idx)1439e839ce9Sgarypen xen_set_version(xen_version_t idx)
1449e839ce9Sgarypen {
1459e839ce9Sgarypen ulong_t ver;
1469e839ce9Sgarypen
1479e839ce9Sgarypen bzero(&xenver[idx], sizeof (xenver[idx]));
1489e839ce9Sgarypen
1499e839ce9Sgarypen ver = HYPERVISOR_xen_version(XENVER_version, 0);
1509e839ce9Sgarypen
1519e839ce9Sgarypen xenver[idx].xv_major = BITX(ver, 31, 16);
1529e839ce9Sgarypen xenver[idx].xv_minor = BITX(ver, 15, 0);
1539e839ce9Sgarypen
1549e839ce9Sgarypen (void) HYPERVISOR_xen_version(XENVER_extraversion, &xenver[idx].xv_ver);
1559e839ce9Sgarypen
1569e839ce9Sgarypen /*
1579e839ce9Sgarypen * The revision is buried in the extraversion information that is
1589e839ce9Sgarypen * maintained by the hypervisor. For our purposes we expect that
1599e839ce9Sgarypen * the revision number is:
1609e839ce9Sgarypen * - the second character in the extraversion information
1619e839ce9Sgarypen * - one character long
1629e839ce9Sgarypen * - numeric digit
1639e839ce9Sgarypen * If it isn't then we can't extract the revision and we leave it
1649e839ce9Sgarypen * set to 0.
1659e839ce9Sgarypen */
1669e839ce9Sgarypen if (strlen(xenver[idx].xv_ver) > 1 && isdigit(xenver[idx].xv_ver[1]))
1679e839ce9Sgarypen xenver[idx].xv_revision = xenver[idx].xv_ver[1] - '0';
1689e839ce9Sgarypen else
1699e839ce9Sgarypen cmn_err(CE_WARN, "Cannot extract revision on this hypervisor "
1709e839ce9Sgarypen "version: v%s, unexpected version format",
1719e839ce9Sgarypen xenver[idx].xv_ver);
1729e839ce9Sgarypen
173ab4a9bebSjohnlev xenver[idx].xv_is_xvm = 0;
174ab4a9bebSjohnlev
175*ad09f8b8SMark Johnson if (strstr(xenver[idx].xv_ver, "-xvm") != NULL)
176ab4a9bebSjohnlev xenver[idx].xv_is_xvm = 1;
177ab4a9bebSjohnlev
1789e839ce9Sgarypen (void) HYPERVISOR_xen_version(XENVER_changeset,
1799e839ce9Sgarypen &xenver[idx].xv_chgset);
1809e839ce9Sgarypen
1819e839ce9Sgarypen (void) HYPERVISOR_xen_version(XENVER_compile_info,
1829e839ce9Sgarypen &xenver[idx].xv_build);
1839e839ce9Sgarypen /*
1849e839ce9Sgarypen * Capabilities are a set of space separated ascii strings
1859e839ce9Sgarypen * e.g. 'xen-3.1-x86_32p' or 'hvm-3.2-x86_64'
1869e839ce9Sgarypen */
1879e839ce9Sgarypen (void) HYPERVISOR_xen_version(XENVER_capabilities,
1889e839ce9Sgarypen &xenver[idx].xv_caps);
1899e839ce9Sgarypen
1909e839ce9Sgarypen cmn_err(CE_CONT, "?v%lu.%lu%s chgset '%s'\n", xenver[idx].xv_major,
1919e839ce9Sgarypen xenver[idx].xv_minor, xenver[idx].xv_ver, xenver[idx].xv_chgset);
1929e839ce9Sgarypen
1939e839ce9Sgarypen if (idx == XENVER_BOOT_IDX)
1949e839ce9Sgarypen bcopy(&xenver[XENVER_BOOT_IDX], &xenver[XENVER_CURRENT_IDX],
1959e839ce9Sgarypen sizeof (xenver[XENVER_BOOT_IDX]));
1969e839ce9Sgarypen }
1979e839ce9Sgarypen
1989e839ce9Sgarypen typedef enum xen_hypervisor_check {
1999e839ce9Sgarypen XEN_RUN_CHECK,
2009e839ce9Sgarypen XEN_SUSPEND_CHECK
2019e839ce9Sgarypen } xen_hypervisor_check_t;
2029e839ce9Sgarypen
2039e839ce9Sgarypen /*
2049e839ce9Sgarypen * To run the hypervisor must be 3.0.4 or better. To suspend/resume
2059e839ce9Sgarypen * we need 3.0.4 or better and if it is 3.0.4. then it must be provided
2069e839ce9Sgarypen * by the Solaris xVM project.
2079e839ce9Sgarypen * Checking can be disabled for testing purposes by setting the
2089e839ce9Sgarypen * xen_suspend_debug variable.
2099e839ce9Sgarypen */
2109e839ce9Sgarypen static int
xen_hypervisor_supports_solaris(xen_hypervisor_check_t check)2119e839ce9Sgarypen xen_hypervisor_supports_solaris(xen_hypervisor_check_t check)
2129e839ce9Sgarypen {
2139e839ce9Sgarypen if (xen_suspend_debug == 1)
2149e839ce9Sgarypen return (1);
2159e839ce9Sgarypen if (XENVER_CURRENT(xv_major) < 3)
2169e839ce9Sgarypen return (0);
2179e839ce9Sgarypen if (XENVER_CURRENT(xv_major) > 3)
2189e839ce9Sgarypen return (1);
2199e839ce9Sgarypen if (XENVER_CURRENT(xv_minor) > 0)
2209e839ce9Sgarypen return (1);
2219e839ce9Sgarypen if (XENVER_CURRENT(xv_revision) < 4)
2229e839ce9Sgarypen return (0);
223ab4a9bebSjohnlev if (check == XEN_SUSPEND_CHECK && XENVER_CURRENT(xv_revision) == 4 &&
224ab4a9bebSjohnlev !XENVER_CURRENT(xv_is_xvm))
2259e839ce9Sgarypen return (0);
226ab4a9bebSjohnlev
2279e839ce9Sgarypen return (1);
2289e839ce9Sgarypen }
2299e839ce9Sgarypen
230ab4a9bebSjohnlev /*
231ab4a9bebSjohnlev * If the hypervisor is -xvm, or 3.1.2 or higher, we don't need the
232ab4a9bebSjohnlev * workaround.
233ab4a9bebSjohnlev */
234ab4a9bebSjohnlev static void
xen_pte_workaround(void)235ab4a9bebSjohnlev xen_pte_workaround(void)
236ab4a9bebSjohnlev {
237ab4a9bebSjohnlev #if defined(__amd64)
238ab4a9bebSjohnlev extern int pt_kern;
239ab4a9bebSjohnlev
240ab4a9bebSjohnlev if (XENVER_CURRENT(xv_major) != 3)
241ab4a9bebSjohnlev return;
242ab4a9bebSjohnlev if (XENVER_CURRENT(xv_minor) > 1)
243ab4a9bebSjohnlev return;
244ab4a9bebSjohnlev if (XENVER_CURRENT(xv_minor) == 1 &&
245ab4a9bebSjohnlev XENVER_CURRENT(xv_revision) > 1)
246ab4a9bebSjohnlev return;
247ab4a9bebSjohnlev if (XENVER_CURRENT(xv_is_xvm))
248ab4a9bebSjohnlev return;
249ab4a9bebSjohnlev
250ab4a9bebSjohnlev pt_kern = PT_USER;
251ab4a9bebSjohnlev #endif
252ab4a9bebSjohnlev }
253ab4a9bebSjohnlev
254843e1988Sjohnlev void
xen_set_callback(void (* func)(void),uint_t type,uint_t flags)255843e1988Sjohnlev xen_set_callback(void (*func)(void), uint_t type, uint_t flags)
256843e1988Sjohnlev {
257843e1988Sjohnlev struct callback_register cb;
258843e1988Sjohnlev
259843e1988Sjohnlev bzero(&cb, sizeof (cb));
260843e1988Sjohnlev #if defined(__amd64)
261843e1988Sjohnlev cb.address = (ulong_t)func;
262843e1988Sjohnlev #elif defined(__i386)
263843e1988Sjohnlev cb.address.cs = KCS_SEL;
264843e1988Sjohnlev cb.address.eip = (ulong_t)func;
265843e1988Sjohnlev #endif
266843e1988Sjohnlev cb.type = type;
267843e1988Sjohnlev cb.flags = flags;
268843e1988Sjohnlev
269843e1988Sjohnlev /*
270843e1988Sjohnlev * XXPV always ignore return value for NMI
271843e1988Sjohnlev */
272843e1988Sjohnlev if (HYPERVISOR_callback_op(CALLBACKOP_register, &cb) != 0 &&
273843e1988Sjohnlev type != CALLBACKTYPE_nmi)
274843e1988Sjohnlev panic("HYPERVISOR_callback_op failed");
275843e1988Sjohnlev }
276843e1988Sjohnlev
277843e1988Sjohnlev void
xen_init_callbacks(void)278843e1988Sjohnlev xen_init_callbacks(void)
279843e1988Sjohnlev {
280843e1988Sjohnlev /*
281843e1988Sjohnlev * register event (interrupt) handler.
282843e1988Sjohnlev */
283843e1988Sjohnlev xen_set_callback(xen_callback, CALLBACKTYPE_event, 0);
284843e1988Sjohnlev
285843e1988Sjohnlev /*
286843e1988Sjohnlev * failsafe handler.
287843e1988Sjohnlev */
288843e1988Sjohnlev xen_set_callback(xen_failsafe_callback, CALLBACKTYPE_failsafe,
289843e1988Sjohnlev CALLBACKF_mask_events);
290843e1988Sjohnlev
291843e1988Sjohnlev /*
292843e1988Sjohnlev * NMI handler.
293843e1988Sjohnlev */
294843e1988Sjohnlev xen_set_callback(nmiint, CALLBACKTYPE_nmi, 0);
295843e1988Sjohnlev
296843e1988Sjohnlev /*
297843e1988Sjohnlev * system call handler
298843e1988Sjohnlev * XXPV move to init_cpu_syscall?
299843e1988Sjohnlev */
300843e1988Sjohnlev #if defined(__amd64)
301843e1988Sjohnlev xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
302843e1988Sjohnlev CALLBACKF_mask_events);
303843e1988Sjohnlev #endif /* __amd64 */
304843e1988Sjohnlev }
305843e1988Sjohnlev
306843e1988Sjohnlev
307843e1988Sjohnlev /*
308843e1988Sjohnlev * cmn_err() followed by a 1/4 second delay; this gives the
309843e1988Sjohnlev * logging service a chance to flush messages and helps avoid
310843e1988Sjohnlev * intermixing output from prom_printf().
311843e1988Sjohnlev * XXPV: doesn't exactly help us on UP though.
312843e1988Sjohnlev */
313843e1988Sjohnlev /*PRINTFLIKE2*/
314843e1988Sjohnlev void
cpr_err(int ce,const char * fmt,...)315843e1988Sjohnlev cpr_err(int ce, const char *fmt, ...)
316843e1988Sjohnlev {
317843e1988Sjohnlev va_list adx;
318843e1988Sjohnlev
319843e1988Sjohnlev va_start(adx, fmt);
320843e1988Sjohnlev vcmn_err(ce, fmt, adx);
321843e1988Sjohnlev va_end(adx);
322843e1988Sjohnlev drv_usecwait(MICROSEC >> 2);
323843e1988Sjohnlev }
324843e1988Sjohnlev
325843e1988Sjohnlev void
xen_suspend_devices(void)326843e1988Sjohnlev xen_suspend_devices(void)
327843e1988Sjohnlev {
328843e1988Sjohnlev int rc;
329843e1988Sjohnlev
330843e1988Sjohnlev SUSPEND_DEBUG("xen_suspend_devices\n");
331843e1988Sjohnlev
332843e1988Sjohnlev if ((rc = cpr_suspend_devices(ddi_root_node())) != 0)
333843e1988Sjohnlev panic("failed to suspend devices: %d", rc);
334843e1988Sjohnlev }
335843e1988Sjohnlev
336843e1988Sjohnlev void
xen_resume_devices(void)337843e1988Sjohnlev xen_resume_devices(void)
338843e1988Sjohnlev {
339843e1988Sjohnlev int rc;
340843e1988Sjohnlev
341843e1988Sjohnlev SUSPEND_DEBUG("xen_resume_devices\n");
342843e1988Sjohnlev
343843e1988Sjohnlev if ((rc = cpr_resume_devices(ddi_root_node(), 0)) != 0)
344843e1988Sjohnlev panic("failed to resume devices: %d", rc);
345843e1988Sjohnlev }
346843e1988Sjohnlev
347843e1988Sjohnlev /*
348843e1988Sjohnlev * The list of mfn pages is out of date. Recompute it.
349843e1988Sjohnlev */
350843e1988Sjohnlev static void
rebuild_mfn_list(void)351843e1988Sjohnlev rebuild_mfn_list(void)
352843e1988Sjohnlev {
353843e1988Sjohnlev int i = 0;
354843e1988Sjohnlev size_t sz;
355843e1988Sjohnlev size_t off;
356843e1988Sjohnlev pfn_t pfn;
357843e1988Sjohnlev
358843e1988Sjohnlev SUSPEND_DEBUG("rebuild_mfn_list\n");
359843e1988Sjohnlev
360843e1988Sjohnlev sz = ((mfn_count * sizeof (mfn_t)) + MMU_PAGEOFFSET) & MMU_PAGEMASK;
361843e1988Sjohnlev
362843e1988Sjohnlev for (off = 0; off < sz; off += MMU_PAGESIZE) {
363843e1988Sjohnlev size_t j = mmu_btop(off);
364843e1988Sjohnlev if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) {
365843e1988Sjohnlev pfn = hat_getpfnum(kas.a_hat,
366843e1988Sjohnlev (caddr_t)&mfn_list_pages[j]);
367843e1988Sjohnlev mfn_list_pages_page[i++] = pfn_to_mfn(pfn);
368843e1988Sjohnlev }
369843e1988Sjohnlev
370843e1988Sjohnlev pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list + off);
371843e1988Sjohnlev mfn_list_pages[j] = pfn_to_mfn(pfn);
372843e1988Sjohnlev }
373843e1988Sjohnlev
374843e1988Sjohnlev pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list_pages_page);
375843e1988Sjohnlev HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
376843e1988Sjohnlev = pfn_to_mfn(pfn);
377843e1988Sjohnlev }
378843e1988Sjohnlev
379843e1988Sjohnlev static void
suspend_cpus(void)380843e1988Sjohnlev suspend_cpus(void)
381843e1988Sjohnlev {
382843e1988Sjohnlev int i;
383843e1988Sjohnlev
384843e1988Sjohnlev SUSPEND_DEBUG("suspend_cpus\n");
385843e1988Sjohnlev
3861d03c31eSjohnlev mp_enter_barrier();
387843e1988Sjohnlev
388843e1988Sjohnlev for (i = 1; i < ncpus; i++) {
389843e1988Sjohnlev if (!CPU_IN_SET(cpu_suspend_lost_set, i)) {
390843e1988Sjohnlev SUSPEND_DEBUG("xen_vcpu_down %d\n", i);
391843e1988Sjohnlev (void) xen_vcpu_down(i);
392843e1988Sjohnlev }
393843e1988Sjohnlev
394843e1988Sjohnlev mach_cpucontext_reset(cpu[i]);
395843e1988Sjohnlev }
396843e1988Sjohnlev }
397843e1988Sjohnlev
398843e1988Sjohnlev static void
resume_cpus(void)399843e1988Sjohnlev resume_cpus(void)
400843e1988Sjohnlev {
401843e1988Sjohnlev int i;
402843e1988Sjohnlev
403843e1988Sjohnlev for (i = 1; i < ncpus; i++) {
404843e1988Sjohnlev if (cpu[i] == NULL)
405843e1988Sjohnlev continue;
406843e1988Sjohnlev
407843e1988Sjohnlev if (!CPU_IN_SET(cpu_suspend_lost_set, i)) {
408843e1988Sjohnlev SUSPEND_DEBUG("xen_vcpu_up %d\n", i);
409843e1988Sjohnlev mach_cpucontext_restore(cpu[i]);
410843e1988Sjohnlev (void) xen_vcpu_up(i);
411843e1988Sjohnlev }
412843e1988Sjohnlev }
413843e1988Sjohnlev
4141d03c31eSjohnlev mp_leave_barrier();
415843e1988Sjohnlev }
416843e1988Sjohnlev
417843e1988Sjohnlev /*
418843e1988Sjohnlev * Top level routine to direct suspend/resume of a domain.
419843e1988Sjohnlev */
420843e1988Sjohnlev void
xen_suspend_domain(void)421843e1988Sjohnlev xen_suspend_domain(void)
422843e1988Sjohnlev {
423843e1988Sjohnlev extern void rtcsync(void);
424843e1988Sjohnlev extern hrtime_t hres_last_tick;
425843e1988Sjohnlev mfn_t start_info_mfn;
426843e1988Sjohnlev ulong_t flags;
427843e1988Sjohnlev pfn_t pfn;
428843e1988Sjohnlev int i;
429843e1988Sjohnlev
430843e1988Sjohnlev /*
4319e839ce9Sgarypen * Check that we are happy to suspend on this hypervisor.
4329e839ce9Sgarypen */
4339e839ce9Sgarypen if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) {
4349e839ce9Sgarypen cpr_err(CE_WARN, "Cannot suspend on this hypervisor "
4359e839ce9Sgarypen "version: v%lu.%lu%s, need at least version v3.0.4 or "
4369e839ce9Sgarypen "-xvm based hypervisor", XENVER_CURRENT(xv_major),
4379e839ce9Sgarypen XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver));
4389e839ce9Sgarypen return;
4399e839ce9Sgarypen }
4409e839ce9Sgarypen
4419e839ce9Sgarypen /*
442843e1988Sjohnlev * XXPV - Are we definitely OK to suspend by the time we've connected
443843e1988Sjohnlev * the handler?
444843e1988Sjohnlev */
445843e1988Sjohnlev
446843e1988Sjohnlev cpr_err(CE_NOTE, "Domain suspending for save/migrate");
447843e1988Sjohnlev
448843e1988Sjohnlev SUSPEND_DEBUG("xen_suspend_domain\n");
449843e1988Sjohnlev
450843e1988Sjohnlev /*
451843e1988Sjohnlev * suspend interrupts and devices
452843e1988Sjohnlev * XXPV - we use suspend/resume for both save/restore domains (like sun
453843e1988Sjohnlev * cpr) and for migration. Would be nice to know the difference if
454843e1988Sjohnlev * possible. For save/restore where down time may be a long time, we
455843e1988Sjohnlev * may want to do more of the things that cpr does. (i.e. notify user
456843e1988Sjohnlev * processes, shrink memory footprint for faster restore, etc.)
457843e1988Sjohnlev */
458843e1988Sjohnlev xen_suspend_devices();
459843e1988Sjohnlev SUSPEND_DEBUG("xenbus_suspend\n");
460843e1988Sjohnlev xenbus_suspend();
461843e1988Sjohnlev
462843e1988Sjohnlev pfn = hat_getpfnum(kas.a_hat, (caddr_t)xen_info);
463843e1988Sjohnlev start_info_mfn = pfn_to_mfn(pfn);
464843e1988Sjohnlev
465843e1988Sjohnlev /*
466843e1988Sjohnlev * XXPV: cpu hotplug can hold this under a xenbus watch. Are we safe
467843e1988Sjohnlev * wrt xenbus being suspended here?
468843e1988Sjohnlev */
469843e1988Sjohnlev mutex_enter(&cpu_lock);
470843e1988Sjohnlev
471843e1988Sjohnlev /*
472843e1988Sjohnlev * Suspend must be done on vcpu 0, as no context for other CPUs is
473843e1988Sjohnlev * saved.
474843e1988Sjohnlev *
475843e1988Sjohnlev * XXPV - add to taskq API ?
476843e1988Sjohnlev */
477843e1988Sjohnlev thread_affinity_set(curthread, 0);
478843e1988Sjohnlev kpreempt_disable();
479843e1988Sjohnlev
480843e1988Sjohnlev SUSPEND_DEBUG("xen_start_migrate\n");
481843e1988Sjohnlev xen_start_migrate();
482843e1988Sjohnlev if (ncpus > 1)
483843e1988Sjohnlev suspend_cpus();
484843e1988Sjohnlev
485843e1988Sjohnlev /*
486843e1988Sjohnlev * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
487843e1988Sjohnlev * any holder would have dropped it to get through suspend_cpus().
488843e1988Sjohnlev */
489843e1988Sjohnlev mutex_enter(&ec_lock);
490843e1988Sjohnlev
491843e1988Sjohnlev /*
492843e1988Sjohnlev * From here on in, we can't take locks.
493843e1988Sjohnlev */
494843e1988Sjohnlev SUSPEND_DEBUG("ec_suspend\n");
495843e1988Sjohnlev ec_suspend();
496843e1988Sjohnlev SUSPEND_DEBUG("gnttab_suspend\n");
497843e1988Sjohnlev gnttab_suspend();
498843e1988Sjohnlev
499843e1988Sjohnlev flags = intr_clear();
500843e1988Sjohnlev
501843e1988Sjohnlev xpv_time_suspend();
502843e1988Sjohnlev
503843e1988Sjohnlev /*
504843e1988Sjohnlev * Currently, the hypervisor incorrectly fails to bring back
505843e1988Sjohnlev * powered-down VCPUs. Thus we need to record any powered-down VCPUs
506843e1988Sjohnlev * to prevent any attempts to operate on them. But we have to do this
507843e1988Sjohnlev * *after* the very first time we do ec_suspend().
508843e1988Sjohnlev */
509843e1988Sjohnlev for (i = 1; i < ncpus; i++) {
510843e1988Sjohnlev if (cpu[i] == NULL)
511843e1988Sjohnlev continue;
512843e1988Sjohnlev
513843e1988Sjohnlev if (cpu_get_state(cpu[i]) == P_POWEROFF)
514843e1988Sjohnlev CPUSET_ATOMIC_ADD(cpu_suspend_lost_set, i);
515843e1988Sjohnlev }
516843e1988Sjohnlev
517843e1988Sjohnlev /*
518843e1988Sjohnlev * The dom0 save/migrate code doesn't automatically translate
519843e1988Sjohnlev * these into PFNs, but expects them to be, so we do it here.
520843e1988Sjohnlev * We don't use mfn_to_pfn() because so many OS services have
521843e1988Sjohnlev * been disabled at this point.
522843e1988Sjohnlev */
523843e1988Sjohnlev xen_info->store_mfn = mfn_to_pfn_mapping[xen_info->store_mfn];
524843e1988Sjohnlev xen_info->console.domU.mfn =
525843e1988Sjohnlev mfn_to_pfn_mapping[xen_info->console.domU.mfn];
526843e1988Sjohnlev
527843e1988Sjohnlev if (CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0) {
528843e1988Sjohnlev prom_printf("xen_suspend_domain(): "
529843e1988Sjohnlev "CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask not set\n");
530843e1988Sjohnlev (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
531843e1988Sjohnlev }
532843e1988Sjohnlev
533843e1988Sjohnlev if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
534843e1988Sjohnlev 0, UVMF_INVLPG)) {
535843e1988Sjohnlev prom_printf("xen_suspend_domain(): "
536843e1988Sjohnlev "HYPERVISOR_update_va_mapping() failed\n");
537843e1988Sjohnlev (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
538843e1988Sjohnlev }
539843e1988Sjohnlev
540843e1988Sjohnlev SUSPEND_DEBUG("HYPERVISOR_suspend\n");
541843e1988Sjohnlev
542843e1988Sjohnlev /*
543843e1988Sjohnlev * At this point we suspend and sometime later resume.
544843e1988Sjohnlev */
545843e1988Sjohnlev if (HYPERVISOR_suspend(start_info_mfn)) {
546843e1988Sjohnlev prom_printf("xen_suspend_domain(): "
547843e1988Sjohnlev "HYPERVISOR_suspend() failed\n");
548843e1988Sjohnlev (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
549843e1988Sjohnlev }
550843e1988Sjohnlev
551843e1988Sjohnlev /*
552843e1988Sjohnlev * Point HYPERVISOR_shared_info to its new value.
553843e1988Sjohnlev */
554843e1988Sjohnlev if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
555843e1988Sjohnlev xen_info->shared_info | PT_NOCONSIST | PT_VALID | PT_WRITABLE,
556843e1988Sjohnlev UVMF_INVLPG))
557843e1988Sjohnlev (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
558843e1988Sjohnlev
559843e1988Sjohnlev if (xen_info->nr_pages != mfn_count) {
560843e1988Sjohnlev prom_printf("xen_suspend_domain(): number of pages"
561843e1988Sjohnlev " changed, was 0x%lx, now 0x%lx\n", mfn_count,
562843e1988Sjohnlev xen_info->nr_pages);
563843e1988Sjohnlev (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
564843e1988Sjohnlev }
565843e1988Sjohnlev
566843e1988Sjohnlev xpv_time_resume();
567843e1988Sjohnlev
568843e1988Sjohnlev cached_max_mfn = 0;
569843e1988Sjohnlev
570843e1988Sjohnlev SUSPEND_DEBUG("gnttab_resume\n");
571843e1988Sjohnlev gnttab_resume();
572843e1988Sjohnlev
573843e1988Sjohnlev /* XXPV: add a note that this must be lockless. */
574843e1988Sjohnlev SUSPEND_DEBUG("ec_resume\n");
575843e1988Sjohnlev ec_resume();
576843e1988Sjohnlev
577843e1988Sjohnlev intr_restore(flags);
578843e1988Sjohnlev
579843e1988Sjohnlev if (ncpus > 1)
580843e1988Sjohnlev resume_cpus();
581843e1988Sjohnlev
582843e1988Sjohnlev mutex_exit(&ec_lock);
583843e1988Sjohnlev xen_end_migrate();
584843e1988Sjohnlev mutex_exit(&cpu_lock);
585843e1988Sjohnlev
586843e1988Sjohnlev /*
587843e1988Sjohnlev * Now we can take locks again.
588843e1988Sjohnlev */
589843e1988Sjohnlev
590843e1988Sjohnlev /*
591843e1988Sjohnlev * Force the tick value used for tv_nsec in hres_tick() to be up to
592843e1988Sjohnlev * date. rtcsync() will reset the hrestime value appropriately.
593843e1988Sjohnlev */
594843e1988Sjohnlev hres_last_tick = xpv_gethrtime();
595843e1988Sjohnlev
596843e1988Sjohnlev /*
597843e1988Sjohnlev * XXPV: we need to have resumed the CPUs since this takes locks, but
598843e1988Sjohnlev * can remote CPUs see bad state? Presumably yes. Should probably nest
599843e1988Sjohnlev * taking of todlock inside of cpu_lock, or vice versa, then provide an
600843e1988Sjohnlev * unlocked version. Probably need to call clkinitf to reset cpu freq
601843e1988Sjohnlev * and re-calibrate if we migrated to a different speed cpu. Also need
602843e1988Sjohnlev * to make a (re)init_cpu_info call to update processor info structs
603843e1988Sjohnlev * and device tree info. That remains to be written at the moment.
604843e1988Sjohnlev */
605843e1988Sjohnlev rtcsync();
606843e1988Sjohnlev
607843e1988Sjohnlev rebuild_mfn_list();
608843e1988Sjohnlev
609843e1988Sjohnlev SUSPEND_DEBUG("xenbus_resume\n");
610843e1988Sjohnlev xenbus_resume();
611843e1988Sjohnlev SUSPEND_DEBUG("xenbus_resume_devices\n");
612843e1988Sjohnlev xen_resume_devices();
613843e1988Sjohnlev
614843e1988Sjohnlev thread_affinity_clear(curthread);
615843e1988Sjohnlev kpreempt_enable();
616843e1988Sjohnlev
617843e1988Sjohnlev SUSPEND_DEBUG("finished xen_suspend_domain\n");
6189e839ce9Sgarypen
6199e839ce9Sgarypen /*
6209e839ce9Sgarypen * We have restarted our suspended domain, update the hypervisor
6219e839ce9Sgarypen * details. NB: This must be done at the end of this function,
6229e839ce9Sgarypen * since we need the domain to be completely resumed before
6239e839ce9Sgarypen * these functions will work correctly.
6249e839ce9Sgarypen */
6259e839ce9Sgarypen xen_set_version(XENVER_CURRENT_IDX);
6269e839ce9Sgarypen
6279e839ce9Sgarypen /*
6289e839ce9Sgarypen * We can check and report a warning, but we don't stop the
6299e839ce9Sgarypen * process.
6309e839ce9Sgarypen */
6319e839ce9Sgarypen if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0)
6329e839ce9Sgarypen cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s "
6339e839ce9Sgarypen "but need at least version v3.0.4",
6349e839ce9Sgarypen XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor),
6359e839ce9Sgarypen XENVER_CURRENT(xv_ver));
6369e839ce9Sgarypen
637843e1988Sjohnlev cmn_err(CE_NOTE, "domain restore/migrate completed");
638843e1988Sjohnlev }
639843e1988Sjohnlev
640843e1988Sjohnlev /*ARGSUSED*/
641843e1988Sjohnlev int
xen_debug_handler(void * arg)642843e1988Sjohnlev xen_debug_handler(void *arg)
643843e1988Sjohnlev {
644843e1988Sjohnlev debug_enter("External debug event received");
645843e1988Sjohnlev
646843e1988Sjohnlev /*
647843e1988Sjohnlev * If we've not got KMDB loaded, output some stuff difficult to capture
648843e1988Sjohnlev * from a domain core.
649843e1988Sjohnlev */
650843e1988Sjohnlev if (!(boothowto & RB_DEBUG)) {
651843e1988Sjohnlev shared_info_t *si = HYPERVISOR_shared_info;
652843e1988Sjohnlev int i;
653843e1988Sjohnlev
654843e1988Sjohnlev prom_printf("evtchn_pending [ ");
655843e1988Sjohnlev for (i = 0; i < 8; i++)
656843e1988Sjohnlev prom_printf("%lx ", si->evtchn_pending[i]);
657843e1988Sjohnlev prom_printf("]\nevtchn_mask [ ");
658843e1988Sjohnlev for (i = 0; i < 8; i++)
659843e1988Sjohnlev prom_printf("%lx ", si->evtchn_mask[i]);
660843e1988Sjohnlev prom_printf("]\n");
661843e1988Sjohnlev
662843e1988Sjohnlev for (i = 0; i < ncpus; i++) {
663843e1988Sjohnlev vcpu_info_t *vcpu = &si->vcpu_info[i];
664843e1988Sjohnlev if (cpu[i] == NULL)
665843e1988Sjohnlev continue;
666843e1988Sjohnlev prom_printf("CPU%d pending %d mask %d sel %lx\n",
667843e1988Sjohnlev i, vcpu->evtchn_upcall_pending,
668843e1988Sjohnlev vcpu->evtchn_upcall_mask,
669843e1988Sjohnlev vcpu->evtchn_pending_sel);
670843e1988Sjohnlev }
671843e1988Sjohnlev }
672843e1988Sjohnlev
673843e1988Sjohnlev return (0);
674843e1988Sjohnlev }
675843e1988Sjohnlev
676843e1988Sjohnlev /*ARGSUSED*/
677843e1988Sjohnlev static void
xen_sysrq_handler(struct xenbus_watch * watch,const char ** vec,unsigned int len)678843e1988Sjohnlev xen_sysrq_handler(struct xenbus_watch *watch, const char **vec,
679843e1988Sjohnlev unsigned int len)
680843e1988Sjohnlev {
681843e1988Sjohnlev xenbus_transaction_t xbt;
682843e1988Sjohnlev char key = '\0';
683843e1988Sjohnlev int ret;
684843e1988Sjohnlev
685843e1988Sjohnlev retry:
686843e1988Sjohnlev if (xenbus_transaction_start(&xbt)) {
687843e1988Sjohnlev cmn_err(CE_WARN, "failed to start sysrq transaction");
688843e1988Sjohnlev return;
689843e1988Sjohnlev }
690843e1988Sjohnlev
691843e1988Sjohnlev if ((ret = xenbus_scanf(xbt, "control", "sysrq", "%c", &key)) != 0) {
692843e1988Sjohnlev /*
693843e1988Sjohnlev * ENOENT happens in response to our own xenbus_rm.
694843e1988Sjohnlev * XXPV - this happens spuriously on boot?
695843e1988Sjohnlev */
696843e1988Sjohnlev if (ret != ENOENT)
697843e1988Sjohnlev cmn_err(CE_WARN, "failed to read sysrq: %d", ret);
698843e1988Sjohnlev goto out;
699843e1988Sjohnlev }
700843e1988Sjohnlev
701843e1988Sjohnlev if ((ret = xenbus_rm(xbt, "control", "sysrq")) != 0) {
702843e1988Sjohnlev cmn_err(CE_WARN, "failed to reset sysrq: %d", ret);
703843e1988Sjohnlev goto out;
704843e1988Sjohnlev }
705843e1988Sjohnlev
706843e1988Sjohnlev if (xenbus_transaction_end(xbt, 0) == EAGAIN)
707843e1988Sjohnlev goto retry;
708843e1988Sjohnlev
709843e1988Sjohnlev /*
710843e1988Sjohnlev * Somewhat arbitrary - on Linux this means 'reboot'. We could just
711843e1988Sjohnlev * accept any key, but this might increase the risk of sending a
712843e1988Sjohnlev * harmless sysrq to the wrong domain...
713843e1988Sjohnlev */
714843e1988Sjohnlev if (key == 'b')
715843e1988Sjohnlev (void) xen_debug_handler(NULL);
716843e1988Sjohnlev else
717843e1988Sjohnlev cmn_err(CE_WARN, "Ignored sysrq %c", key);
718843e1988Sjohnlev return;
719843e1988Sjohnlev
720843e1988Sjohnlev out:
721843e1988Sjohnlev (void) xenbus_transaction_end(xbt, 1);
722843e1988Sjohnlev }
723843e1988Sjohnlev
724843e1988Sjohnlev taskq_t *xen_shutdown_tq;
725843e1988Sjohnlev
726843e1988Sjohnlev #define SHUTDOWN_INVALID -1
727843e1988Sjohnlev #define SHUTDOWN_POWEROFF 0
728843e1988Sjohnlev #define SHUTDOWN_REBOOT 1
729843e1988Sjohnlev #define SHUTDOWN_SUSPEND 2
730843e1988Sjohnlev #define SHUTDOWN_HALT 3
731843e1988Sjohnlev #define SHUTDOWN_MAX 4
732843e1988Sjohnlev
733843e1988Sjohnlev #define SHUTDOWN_TIMEOUT_SECS (60 * 5)
734843e1988Sjohnlev
735843e1988Sjohnlev static const char *cmd_strings[SHUTDOWN_MAX] = {
736843e1988Sjohnlev "poweroff",
737843e1988Sjohnlev "reboot",
738843e1988Sjohnlev "suspend",
739843e1988Sjohnlev "halt"
740843e1988Sjohnlev };
741843e1988Sjohnlev
742843e1988Sjohnlev static void
xen_dirty_shutdown(void * arg)743843e1988Sjohnlev xen_dirty_shutdown(void *arg)
744843e1988Sjohnlev {
745843e1988Sjohnlev int cmd = (uintptr_t)arg;
746843e1988Sjohnlev
747843e1988Sjohnlev cmn_err(CE_WARN, "Externally requested shutdown failed or "
748843e1988Sjohnlev "timed out.\nShutting down.\n");
749843e1988Sjohnlev
750843e1988Sjohnlev switch (cmd) {
751843e1988Sjohnlev case SHUTDOWN_HALT:
752843e1988Sjohnlev case SHUTDOWN_POWEROFF:
753843e1988Sjohnlev (void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred);
754843e1988Sjohnlev break;
755843e1988Sjohnlev case SHUTDOWN_REBOOT:
756843e1988Sjohnlev (void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred);
757843e1988Sjohnlev break;
758843e1988Sjohnlev }
759843e1988Sjohnlev }
760843e1988Sjohnlev
761843e1988Sjohnlev static void
xen_shutdown(void * arg)762843e1988Sjohnlev xen_shutdown(void *arg)
763843e1988Sjohnlev {
764843e1988Sjohnlev int cmd = (uintptr_t)arg;
765c48ac12eSjohnlev proc_t *initpp;
766843e1988Sjohnlev
767843e1988Sjohnlev ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX);
768843e1988Sjohnlev
769843e1988Sjohnlev if (cmd == SHUTDOWN_SUSPEND) {
770843e1988Sjohnlev xen_suspend_domain();
771843e1988Sjohnlev return;
772843e1988Sjohnlev }
773843e1988Sjohnlev
774c48ac12eSjohnlev switch (cmd) {
775c48ac12eSjohnlev case SHUTDOWN_POWEROFF:
776c48ac12eSjohnlev force_shutdown_method = AD_POWEROFF;
777c48ac12eSjohnlev break;
778c48ac12eSjohnlev case SHUTDOWN_HALT:
779c48ac12eSjohnlev force_shutdown_method = AD_HALT;
780c48ac12eSjohnlev break;
781c48ac12eSjohnlev case SHUTDOWN_REBOOT:
782c48ac12eSjohnlev force_shutdown_method = AD_BOOT;
783c48ac12eSjohnlev break;
784c48ac12eSjohnlev }
785843e1988Sjohnlev
786c48ac12eSjohnlev /*
787c48ac12eSjohnlev * If we're still booting and init(1) isn't set up yet, simply halt.
788c48ac12eSjohnlev */
789c48ac12eSjohnlev mutex_enter(&pidlock);
790c48ac12eSjohnlev initpp = prfind(P_INITPID);
791c48ac12eSjohnlev mutex_exit(&pidlock);
792c48ac12eSjohnlev if (initpp == NULL) {
793c48ac12eSjohnlev extern void halt(char *);
794c48ac12eSjohnlev halt("Power off the System"); /* just in case */
795c48ac12eSjohnlev }
796843e1988Sjohnlev
797c48ac12eSjohnlev /*
798c48ac12eSjohnlev * else, graceful shutdown with inittab and all getting involved
799c48ac12eSjohnlev */
800c48ac12eSjohnlev psignal(initpp, SIGPWR);
801843e1988Sjohnlev
802843e1988Sjohnlev (void) timeout(xen_dirty_shutdown, arg,
803843e1988Sjohnlev SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC));
804843e1988Sjohnlev }
805843e1988Sjohnlev
806843e1988Sjohnlev /*ARGSUSED*/
807843e1988Sjohnlev static void
xen_shutdown_handler(struct xenbus_watch * watch,const char ** vec,unsigned int len)808843e1988Sjohnlev xen_shutdown_handler(struct xenbus_watch *watch, const char **vec,
809843e1988Sjohnlev unsigned int len)
810843e1988Sjohnlev {
811843e1988Sjohnlev char *str;
812843e1988Sjohnlev xenbus_transaction_t xbt;
813843e1988Sjohnlev int err, shutdown_code = SHUTDOWN_INVALID;
814843e1988Sjohnlev unsigned int slen;
815843e1988Sjohnlev
816843e1988Sjohnlev again:
817843e1988Sjohnlev err = xenbus_transaction_start(&xbt);
818843e1988Sjohnlev if (err)
819843e1988Sjohnlev return;
820843e1988Sjohnlev if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) {
821843e1988Sjohnlev (void) xenbus_transaction_end(xbt, 1);
822843e1988Sjohnlev return;
823843e1988Sjohnlev }
824843e1988Sjohnlev
825843e1988Sjohnlev SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str);
826843e1988Sjohnlev
827843e1988Sjohnlev /*
828843e1988Sjohnlev * If this is a watch fired from our write below, check out early to
829843e1988Sjohnlev * avoid an infinite loop.
830843e1988Sjohnlev */
831843e1988Sjohnlev if (strcmp(str, "") == 0) {
832843e1988Sjohnlev (void) xenbus_transaction_end(xbt, 0);
833843e1988Sjohnlev kmem_free(str, slen);
834843e1988Sjohnlev return;
835843e1988Sjohnlev } else if (strcmp(str, "poweroff") == 0) {
836843e1988Sjohnlev shutdown_code = SHUTDOWN_POWEROFF;
837843e1988Sjohnlev } else if (strcmp(str, "reboot") == 0) {
838843e1988Sjohnlev shutdown_code = SHUTDOWN_REBOOT;
839843e1988Sjohnlev } else if (strcmp(str, "suspend") == 0) {
840843e1988Sjohnlev shutdown_code = SHUTDOWN_SUSPEND;
841843e1988Sjohnlev } else if (strcmp(str, "halt") == 0) {
842843e1988Sjohnlev shutdown_code = SHUTDOWN_HALT;
843843e1988Sjohnlev } else {
844843e1988Sjohnlev printf("Ignoring shutdown request: %s\n", str);
845843e1988Sjohnlev }
846843e1988Sjohnlev
847843e1988Sjohnlev /*
848843e1988Sjohnlev * XXPV Should we check the value of xenbus_write() too, or are all
849843e1988Sjohnlev * errors automatically folded into xenbus_transaction_end() ??
850843e1988Sjohnlev */
851843e1988Sjohnlev (void) xenbus_write(xbt, "control", "shutdown", "");
852843e1988Sjohnlev err = xenbus_transaction_end(xbt, 0);
853843e1988Sjohnlev if (err == EAGAIN) {
854843e1988Sjohnlev SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id);
855843e1988Sjohnlev kmem_free(str, slen);
856843e1988Sjohnlev goto again;
857843e1988Sjohnlev }
858843e1988Sjohnlev
859843e1988Sjohnlev kmem_free(str, slen);
860843e1988Sjohnlev if (shutdown_code != SHUTDOWN_INVALID) {
861843e1988Sjohnlev (void) taskq_dispatch(xen_shutdown_tq, xen_shutdown,
862843e1988Sjohnlev (void *)(intptr_t)shutdown_code, 0);
863843e1988Sjohnlev }
864843e1988Sjohnlev }
865843e1988Sjohnlev
866843e1988Sjohnlev static struct xenbus_watch shutdown_watch;
867843e1988Sjohnlev static struct xenbus_watch sysrq_watch;
868843e1988Sjohnlev
869843e1988Sjohnlev void
xen_late_startup(void)870843e1988Sjohnlev xen_late_startup(void)
871843e1988Sjohnlev {
872843e1988Sjohnlev if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
873843e1988Sjohnlev xen_shutdown_tq = taskq_create("shutdown_taskq", 1,
874843e1988Sjohnlev maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
875843e1988Sjohnlev shutdown_watch.node = "control/shutdown";
876843e1988Sjohnlev shutdown_watch.callback = xen_shutdown_handler;
877843e1988Sjohnlev if (register_xenbus_watch(&shutdown_watch))
878843e1988Sjohnlev cmn_err(CE_WARN, "Failed to set shutdown watcher");
879843e1988Sjohnlev
880843e1988Sjohnlev sysrq_watch.node = "control/sysrq";
881843e1988Sjohnlev sysrq_watch.callback = xen_sysrq_handler;
882843e1988Sjohnlev if (register_xenbus_watch(&sysrq_watch))
883843e1988Sjohnlev cmn_err(CE_WARN, "Failed to set sysrq watcher");
884843e1988Sjohnlev }
885843e1988Sjohnlev balloon_init(xen_info->nr_pages);
886843e1988Sjohnlev }
887843e1988Sjohnlev
888843e1988Sjohnlev #ifdef DEBUG
889843e1988Sjohnlev #define XEN_PRINTF_BUFSIZE 1024
890843e1988Sjohnlev
891843e1988Sjohnlev char xen_printf_buffer[XEN_PRINTF_BUFSIZE];
892843e1988Sjohnlev
893843e1988Sjohnlev /*
894843e1988Sjohnlev * Printf function that calls hypervisor directly. For DomU it only
895843e1988Sjohnlev * works when running on a xen hypervisor built with debug on. Works
896843e1988Sjohnlev * always since no I/O ring interaction is needed.
897843e1988Sjohnlev */
898843e1988Sjohnlev /*PRINTFLIKE1*/
899843e1988Sjohnlev void
xen_printf(const char * fmt,...)900843e1988Sjohnlev xen_printf(const char *fmt, ...)
901843e1988Sjohnlev {
902843e1988Sjohnlev va_list ap;
903843e1988Sjohnlev
904843e1988Sjohnlev va_start(ap, fmt);
905843e1988Sjohnlev (void) vsnprintf(xen_printf_buffer, XEN_PRINTF_BUFSIZE, fmt, ap);
906843e1988Sjohnlev va_end(ap);
907843e1988Sjohnlev
908843e1988Sjohnlev (void) HYPERVISOR_console_io(CONSOLEIO_write,
909843e1988Sjohnlev strlen(xen_printf_buffer), xen_printf_buffer);
910843e1988Sjohnlev }
911843e1988Sjohnlev #else
912843e1988Sjohnlev void
xen_printf(const char * fmt,...)913843e1988Sjohnlev xen_printf(const char *fmt, ...)
914843e1988Sjohnlev {
915843e1988Sjohnlev }
916843e1988Sjohnlev #endif /* DEBUG */
917843e1988Sjohnlev
918843e1988Sjohnlev void
startup_xen_version(void)919ab4a9bebSjohnlev startup_xen_version(void)
920843e1988Sjohnlev {
9219e839ce9Sgarypen xen_set_version(XENVER_BOOT_IDX);
9229e839ce9Sgarypen if (xen_hypervisor_supports_solaris(XEN_RUN_CHECK) == 0)
9239e839ce9Sgarypen cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s "
9249e839ce9Sgarypen "but need at least version v3.0.4",
9259e839ce9Sgarypen XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor),
9269e839ce9Sgarypen XENVER_CURRENT(xv_ver));
927ab4a9bebSjohnlev xen_pte_workaround();
928843e1988Sjohnlev }
929843e1988Sjohnlev
930e4b86885SCheng Sean Ye int xen_mca_simulate_mc_physinfo_failure = 0;
931e4b86885SCheng Sean Ye
932e4b86885SCheng Sean Ye void
startup_xen_mca(void)933e4b86885SCheng Sean Ye startup_xen_mca(void)
934e4b86885SCheng Sean Ye {
935e4b86885SCheng Sean Ye if (!DOMAIN_IS_INITDOMAIN(xen_info))
936e4b86885SCheng Sean Ye return;
937e4b86885SCheng Sean Ye
938e4b86885SCheng Sean Ye xen_phys_ncpus = 0;
939e4b86885SCheng Sean Ye xen_phys_cpus = NULL;
940e4b86885SCheng Sean Ye
941e4b86885SCheng Sean Ye if (xen_mca_simulate_mc_physinfo_failure ||
942e4b86885SCheng Sean Ye xen_get_mc_physcpuinfo(NULL, &xen_phys_ncpus) != 0) {
943e4b86885SCheng Sean Ye cmn_err(CE_WARN,
944e4b86885SCheng Sean Ye "%sxen_get_mc_physinfo failure during xen MCA startup: "
945e4b86885SCheng Sean Ye "there will be no machine check support",
946e4b86885SCheng Sean Ye xen_mca_simulate_mc_physinfo_failure ? "(simulated) " : "");
947e4b86885SCheng Sean Ye return;
948e4b86885SCheng Sean Ye }
949e4b86885SCheng Sean Ye
950e4b86885SCheng Sean Ye xen_phys_cpus = kmem_alloc(xen_phys_ncpus *
951e4b86885SCheng Sean Ye sizeof (xen_mc_logical_cpu_t), KM_NOSLEEP);
952e4b86885SCheng Sean Ye
953e4b86885SCheng Sean Ye if (xen_phys_cpus == NULL) {
954e4b86885SCheng Sean Ye cmn_err(CE_WARN,
955349b53ddSStuart Maybee "xen_get_mc_physinfo failure: can't allocate CPU array");
956e4b86885SCheng Sean Ye return;
957e4b86885SCheng Sean Ye }
958e4b86885SCheng Sean Ye
959e4b86885SCheng Sean Ye if (xen_get_mc_physcpuinfo(xen_phys_cpus, &xen_phys_ncpus) != 0) {
960e4b86885SCheng Sean Ye cmn_err(CE_WARN, "xen_get_mc_physinfo failure: no "
961e4b86885SCheng Sean Ye "physical CPU info");
962e4b86885SCheng Sean Ye kmem_free(xen_phys_cpus,
963e4b86885SCheng Sean Ye xen_phys_ncpus * sizeof (xen_mc_logical_cpu_t));
964e4b86885SCheng Sean Ye xen_phys_ncpus = 0;
965e4b86885SCheng Sean Ye xen_phys_cpus = NULL;
966e4b86885SCheng Sean Ye }
967e4b86885SCheng Sean Ye
968e4b86885SCheng Sean Ye if (xen_physinfo_debug) {
969e4b86885SCheng Sean Ye xen_mc_logical_cpu_t *xcp;
970e4b86885SCheng Sean Ye unsigned i;
971e4b86885SCheng Sean Ye
972e4b86885SCheng Sean Ye cmn_err(CE_NOTE, "xvm mca: %u physical cpus:\n",
973e4b86885SCheng Sean Ye xen_phys_ncpus);
974e4b86885SCheng Sean Ye for (i = 0; i < xen_phys_ncpus; i++) {
975e4b86885SCheng Sean Ye xcp = &xen_phys_cpus[i];
976e4b86885SCheng Sean Ye cmn_err(CE_NOTE, "cpu%u: (%u, %u, %u) apid %u",
977e4b86885SCheng Sean Ye xcp->mc_cpunr, xcp->mc_chipid, xcp->mc_coreid,
978e4b86885SCheng Sean Ye xcp->mc_threadid, xcp->mc_apicid);
979e4b86885SCheng Sean Ye }
980e4b86885SCheng Sean Ye }
981e4b86885SCheng Sean Ye }
982e4b86885SCheng Sean Ye
983843e1988Sjohnlev /*
984843e1988Sjohnlev * Miscellaneous hypercall wrappers with slightly more verbose diagnostics.
985843e1988Sjohnlev */
986843e1988Sjohnlev
987843e1988Sjohnlev void
xen_set_gdt(ulong_t * frame_list,int entries)988843e1988Sjohnlev xen_set_gdt(ulong_t *frame_list, int entries)
989843e1988Sjohnlev {
990843e1988Sjohnlev int err;
991843e1988Sjohnlev if ((err = HYPERVISOR_set_gdt(frame_list, entries)) != 0) {
992843e1988Sjohnlev /*
993843e1988Sjohnlev * X_EINVAL: reserved entry or bad frames
994843e1988Sjohnlev * X_EFAULT: bad address
995843e1988Sjohnlev */
996843e1988Sjohnlev panic("xen_set_gdt(%p, %d): error %d",
997843e1988Sjohnlev (void *)frame_list, entries, -(int)err);
998843e1988Sjohnlev }
999843e1988Sjohnlev }
1000843e1988Sjohnlev
1001843e1988Sjohnlev void
xen_set_ldt(user_desc_t * ldt,uint_t nsels)1002843e1988Sjohnlev xen_set_ldt(user_desc_t *ldt, uint_t nsels)
1003843e1988Sjohnlev {
1004843e1988Sjohnlev struct mmuext_op op;
1005843e1988Sjohnlev long err;
1006843e1988Sjohnlev
1007843e1988Sjohnlev op.cmd = MMUEXT_SET_LDT;
1008843e1988Sjohnlev op.arg1.linear_addr = (uintptr_t)ldt;
1009843e1988Sjohnlev op.arg2.nr_ents = nsels;
1010843e1988Sjohnlev
1011843e1988Sjohnlev if ((err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) != 0) {
1012843e1988Sjohnlev panic("xen_set_ldt(%p, %d): error %d",
1013843e1988Sjohnlev (void *)ldt, nsels, -(int)err);
1014843e1988Sjohnlev }
1015843e1988Sjohnlev }
1016843e1988Sjohnlev
1017843e1988Sjohnlev void
xen_stack_switch(ulong_t ss,ulong_t esp)1018843e1988Sjohnlev xen_stack_switch(ulong_t ss, ulong_t esp)
1019843e1988Sjohnlev {
1020843e1988Sjohnlev long err;
1021843e1988Sjohnlev
1022843e1988Sjohnlev if ((err = HYPERVISOR_stack_switch(ss, esp)) != 0) {
1023843e1988Sjohnlev /*
1024843e1988Sjohnlev * X_EPERM: bad selector
1025843e1988Sjohnlev */
1026843e1988Sjohnlev panic("xen_stack_switch(%lx, %lx): error %d", ss, esp,
1027843e1988Sjohnlev -(int)err);
1028843e1988Sjohnlev }
1029843e1988Sjohnlev }
1030843e1988Sjohnlev
1031843e1988Sjohnlev long
xen_set_trap_table(trap_info_t * table)1032843e1988Sjohnlev xen_set_trap_table(trap_info_t *table)
1033843e1988Sjohnlev {
1034843e1988Sjohnlev long err;
1035843e1988Sjohnlev
1036843e1988Sjohnlev if ((err = HYPERVISOR_set_trap_table(table)) != 0) {
1037843e1988Sjohnlev /*
1038843e1988Sjohnlev * X_EFAULT: bad address
1039843e1988Sjohnlev * X_EPERM: bad selector
1040843e1988Sjohnlev */
1041843e1988Sjohnlev panic("xen_set_trap_table(%p): error %d", (void *)table,
1042843e1988Sjohnlev -(int)err);
1043843e1988Sjohnlev }
1044843e1988Sjohnlev return (err);
1045843e1988Sjohnlev }
1046843e1988Sjohnlev
1047843e1988Sjohnlev #if defined(__amd64)
1048843e1988Sjohnlev void
xen_set_segment_base(int reg,ulong_t value)1049843e1988Sjohnlev xen_set_segment_base(int reg, ulong_t value)
1050843e1988Sjohnlev {
1051843e1988Sjohnlev long err;
1052843e1988Sjohnlev
1053843e1988Sjohnlev if ((err = HYPERVISOR_set_segment_base(reg, value)) != 0) {
1054843e1988Sjohnlev /*
1055843e1988Sjohnlev * X_EFAULT: bad address
1056843e1988Sjohnlev * X_EINVAL: bad type
1057843e1988Sjohnlev */
1058843e1988Sjohnlev panic("xen_set_segment_base(%d, %lx): error %d",
1059843e1988Sjohnlev reg, value, -(int)err);
1060843e1988Sjohnlev }
1061843e1988Sjohnlev }
1062843e1988Sjohnlev #endif /* __amd64 */
1063843e1988Sjohnlev
1064843e1988Sjohnlev /*
1065843e1988Sjohnlev * Translate a hypervisor errcode to a Solaris error code.
1066843e1988Sjohnlev */
1067843e1988Sjohnlev int
xen_xlate_errcode(int error)1068843e1988Sjohnlev xen_xlate_errcode(int error)
1069843e1988Sjohnlev {
1070843e1988Sjohnlev switch (-error) {
1071843e1988Sjohnlev
1072843e1988Sjohnlev /*
1073843e1988Sjohnlev * Translate hypervisor errno's into native errno's
1074843e1988Sjohnlev */
1075843e1988Sjohnlev
1076843e1988Sjohnlev #define CASE(num) case X_##num: error = num; break
1077843e1988Sjohnlev
1078843e1988Sjohnlev CASE(EPERM); CASE(ENOENT); CASE(ESRCH);
1079843e1988Sjohnlev CASE(EINTR); CASE(EIO); CASE(ENXIO);
1080843e1988Sjohnlev CASE(E2BIG); CASE(ENOMEM); CASE(EACCES);
1081843e1988Sjohnlev CASE(EFAULT); CASE(EBUSY); CASE(EEXIST);
1082843e1988Sjohnlev CASE(ENODEV); CASE(EISDIR); CASE(EINVAL);
1083843e1988Sjohnlev CASE(ENOSPC); CASE(ESPIPE); CASE(EROFS);
1084843e1988Sjohnlev CASE(ENOSYS); CASE(ENOTEMPTY); CASE(EISCONN);
1085349b53ddSStuart Maybee CASE(ENODATA); CASE(EAGAIN);
1086843e1988Sjohnlev
1087843e1988Sjohnlev #undef CASE
1088843e1988Sjohnlev
1089843e1988Sjohnlev default:
1090843e1988Sjohnlev panic("xen_xlate_errcode: unknown error %d", error);
1091843e1988Sjohnlev }
1092843e1988Sjohnlev
1093843e1988Sjohnlev return (error);
1094843e1988Sjohnlev }
1095843e1988Sjohnlev
1096843e1988Sjohnlev /*
1097843e1988Sjohnlev * Raise PS_IOPL on current vcpu to user level.
1098843e1988Sjohnlev * Caller responsible for preventing kernel preemption.
1099843e1988Sjohnlev */
1100843e1988Sjohnlev void
xen_enable_user_iopl(void)1101843e1988Sjohnlev xen_enable_user_iopl(void)
1102843e1988Sjohnlev {
1103843e1988Sjohnlev physdev_set_iopl_t set_iopl;
1104843e1988Sjohnlev set_iopl.iopl = 3; /* user ring 3 */
1105843e1988Sjohnlev (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1106843e1988Sjohnlev }
1107843e1988Sjohnlev
1108843e1988Sjohnlev /*
1109843e1988Sjohnlev * Drop PS_IOPL on current vcpu to kernel level
1110843e1988Sjohnlev */
1111843e1988Sjohnlev void
xen_disable_user_iopl(void)1112843e1988Sjohnlev xen_disable_user_iopl(void)
1113843e1988Sjohnlev {
1114843e1988Sjohnlev physdev_set_iopl_t set_iopl;
1115843e1988Sjohnlev set_iopl.iopl = 1; /* kernel pseudo ring 1 */
1116843e1988Sjohnlev (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1117843e1988Sjohnlev }
1118843e1988Sjohnlev
1119843e1988Sjohnlev int
xen_gdt_setprot(cpu_t * cp,uint_t prot)1120843e1988Sjohnlev xen_gdt_setprot(cpu_t *cp, uint_t prot)
1121843e1988Sjohnlev {
1122843e1988Sjohnlev int err;
1123843e1988Sjohnlev #if defined(__amd64)
1124843e1988Sjohnlev int pt_bits = PT_VALID;
1125843e1988Sjohnlev if (prot & PROT_WRITE)
1126843e1988Sjohnlev pt_bits |= PT_WRITABLE;
1127843e1988Sjohnlev #endif
1128843e1988Sjohnlev
1129843e1988Sjohnlev if ((err = as_setprot(&kas, (caddr_t)cp->cpu_gdt,
1130843e1988Sjohnlev MMU_PAGESIZE, prot)) != 0)
1131843e1988Sjohnlev goto done;
1132843e1988Sjohnlev
1133843e1988Sjohnlev #if defined(__amd64)
1134843e1988Sjohnlev err = xen_kpm_page(mmu_btop(cp->cpu_m.mcpu_gdtpa), pt_bits);
1135843e1988Sjohnlev #endif
1136843e1988Sjohnlev
1137843e1988Sjohnlev done:
1138843e1988Sjohnlev if (err) {
1139843e1988Sjohnlev cmn_err(CE_WARN, "cpu%d: xen_gdt_setprot(%s) failed: error %d",
1140843e1988Sjohnlev cp->cpu_id, (prot & PROT_WRITE) ? "writable" : "read-only",
1141843e1988Sjohnlev err);
1142843e1988Sjohnlev }
1143843e1988Sjohnlev
1144843e1988Sjohnlev return (err);
1145843e1988Sjohnlev }
1146843e1988Sjohnlev
1147843e1988Sjohnlev int
xen_ldt_setprot(user_desc_t * ldt,size_t lsize,uint_t prot)1148843e1988Sjohnlev xen_ldt_setprot(user_desc_t *ldt, size_t lsize, uint_t prot)
1149843e1988Sjohnlev {
1150843e1988Sjohnlev int err;
1151843e1988Sjohnlev caddr_t lva = (caddr_t)ldt;
1152843e1988Sjohnlev #if defined(__amd64)
1153843e1988Sjohnlev int pt_bits = PT_VALID;
1154843e1988Sjohnlev pgcnt_t npgs;
1155843e1988Sjohnlev if (prot & PROT_WRITE)
1156843e1988Sjohnlev pt_bits |= PT_WRITABLE;
1157843e1988Sjohnlev #endif /* __amd64 */
1158843e1988Sjohnlev
1159843e1988Sjohnlev if ((err = as_setprot(&kas, (caddr_t)ldt, lsize, prot)) != 0)
1160843e1988Sjohnlev goto done;
1161843e1988Sjohnlev
1162843e1988Sjohnlev #if defined(__amd64)
1163843e1988Sjohnlev
1164843e1988Sjohnlev ASSERT(IS_P2ALIGNED(lsize, PAGESIZE));
1165843e1988Sjohnlev npgs = mmu_btop(lsize);
1166843e1988Sjohnlev while (npgs--) {
1167843e1988Sjohnlev if ((err = xen_kpm_page(hat_getpfnum(kas.a_hat, lva),
1168843e1988Sjohnlev pt_bits)) != 0)
1169843e1988Sjohnlev break;
1170843e1988Sjohnlev lva += PAGESIZE;
1171843e1988Sjohnlev }
1172843e1988Sjohnlev #endif /* __amd64 */
1173843e1988Sjohnlev
1174843e1988Sjohnlev done:
1175843e1988Sjohnlev if (err) {
1176843e1988Sjohnlev cmn_err(CE_WARN, "xen_ldt_setprot(%p, %s) failed: error %d",
1177843e1988Sjohnlev (void *)lva,
1178843e1988Sjohnlev (prot & PROT_WRITE) ? "writable" : "read-only", err);
1179843e1988Sjohnlev }
1180843e1988Sjohnlev
1181843e1988Sjohnlev return (err);
1182843e1988Sjohnlev }
1183e4b86885SCheng Sean Ye
1184e4b86885SCheng Sean Ye int
xen_get_mc_physcpuinfo(xen_mc_logical_cpu_t * log_cpus,uint_t * ncpus)1185e4b86885SCheng Sean Ye xen_get_mc_physcpuinfo(xen_mc_logical_cpu_t *log_cpus, uint_t *ncpus)
1186e4b86885SCheng Sean Ye {
1187*ad09f8b8SMark Johnson xen_mc_t xmc;
1188*ad09f8b8SMark Johnson struct xen_mc_physcpuinfo *cpi = &xmc.u.mc_physcpuinfo;
1189e4b86885SCheng Sean Ye
1190*ad09f8b8SMark Johnson cpi->ncpus = *ncpus;
1191e4b86885SCheng Sean Ye /*LINTED: constant in conditional context*/
1192*ad09f8b8SMark Johnson set_xen_guest_handle(cpi->info, log_cpus);
1193e4b86885SCheng Sean Ye
1194*ad09f8b8SMark Johnson if (HYPERVISOR_mca(XEN_MC_physcpuinfo, &xmc) != 0)
1195e4b86885SCheng Sean Ye return (-1);
1196e4b86885SCheng Sean Ye
1197*ad09f8b8SMark Johnson *ncpus = cpi->ncpus;
1198e4b86885SCheng Sean Ye return (0);
1199e4b86885SCheng Sean Ye }
1200e4b86885SCheng Sean Ye
1201e4b86885SCheng Sean Ye void
print_panic(const char * str)1202e4b86885SCheng Sean Ye print_panic(const char *str)
1203e4b86885SCheng Sean Ye {
1204e4b86885SCheng Sean Ye xen_printf(str);
1205e4b86885SCheng Sean Ye }
1206e4b86885SCheng Sean Ye
1207e4b86885SCheng Sean Ye /*
1208e4b86885SCheng Sean Ye * Interfaces to iterate over real cpu information, but only that info
1209e4b86885SCheng Sean Ye * which we choose to expose here. These are of interest to dom0
1210e4b86885SCheng Sean Ye * only (and the backing hypercall should not work for domu).
1211e4b86885SCheng Sean Ye */
1212e4b86885SCheng Sean Ye
1213e4b86885SCheng Sean Ye xen_mc_lcpu_cookie_t
xen_physcpu_next(xen_mc_lcpu_cookie_t cookie)1214e4b86885SCheng Sean Ye xen_physcpu_next(xen_mc_lcpu_cookie_t cookie)
1215e4b86885SCheng Sean Ye {
1216e4b86885SCheng Sean Ye xen_mc_logical_cpu_t *xcp = (xen_mc_logical_cpu_t *)cookie;
1217e4b86885SCheng Sean Ye
1218e4b86885SCheng Sean Ye if (!DOMAIN_IS_INITDOMAIN(xen_info))
1219e4b86885SCheng Sean Ye return (NULL);
1220e4b86885SCheng Sean Ye
1221e4b86885SCheng Sean Ye if (cookie == NULL)
1222e4b86885SCheng Sean Ye return ((xen_mc_lcpu_cookie_t)xen_phys_cpus);
1223e4b86885SCheng Sean Ye
1224e4b86885SCheng Sean Ye if (xcp == xen_phys_cpus + xen_phys_ncpus - 1)
1225e4b86885SCheng Sean Ye return (NULL);
1226e4b86885SCheng Sean Ye else
1227e4b86885SCheng Sean Ye return ((xen_mc_lcpu_cookie_t)++xcp);
1228e4b86885SCheng Sean Ye }
1229e4b86885SCheng Sean Ye
1230e4b86885SCheng Sean Ye #define COOKIE2XCP(c) ((xen_mc_logical_cpu_t *)(c))
1231e4b86885SCheng Sean Ye
1232e4b86885SCheng Sean Ye const char *
xen_physcpu_vendorstr(xen_mc_lcpu_cookie_t cookie)1233e4b86885SCheng Sean Ye xen_physcpu_vendorstr(xen_mc_lcpu_cookie_t cookie)
1234e4b86885SCheng Sean Ye {
1235e4b86885SCheng Sean Ye xen_mc_logical_cpu_t *xcp = COOKIE2XCP(cookie);
1236e4b86885SCheng Sean Ye
1237e4b86885SCheng Sean Ye return ((const char *)&xcp->mc_vendorid[0]);
1238e4b86885SCheng Sean Ye }
1239e4b86885SCheng Sean Ye
1240e4b86885SCheng Sean Ye int
xen_physcpu_family(xen_mc_lcpu_cookie_t cookie)1241e4b86885SCheng Sean Ye xen_physcpu_family(xen_mc_lcpu_cookie_t cookie)
1242e4b86885SCheng Sean Ye {
1243e4b86885SCheng Sean Ye return (COOKIE2XCP(cookie)->mc_family);
1244e4b86885SCheng Sean Ye }
1245e4b86885SCheng Sean Ye
1246e4b86885SCheng Sean Ye int
xen_physcpu_model(xen_mc_lcpu_cookie_t cookie)1247e4b86885SCheng Sean Ye xen_physcpu_model(xen_mc_lcpu_cookie_t cookie)
1248e4b86885SCheng Sean Ye {
1249e4b86885SCheng Sean Ye return (COOKIE2XCP(cookie)->mc_model);
1250e4b86885SCheng Sean Ye }
1251e4b86885SCheng Sean Ye
1252e4b86885SCheng Sean Ye int
xen_physcpu_stepping(xen_mc_lcpu_cookie_t cookie)1253e4b86885SCheng Sean Ye xen_physcpu_stepping(xen_mc_lcpu_cookie_t cookie)
1254e4b86885SCheng Sean Ye {
1255e4b86885SCheng Sean Ye return (COOKIE2XCP(cookie)->mc_step);
1256e4b86885SCheng Sean Ye }
1257e4b86885SCheng Sean Ye
1258e4b86885SCheng Sean Ye id_t
xen_physcpu_chipid(xen_mc_lcpu_cookie_t cookie)1259e4b86885SCheng Sean Ye xen_physcpu_chipid(xen_mc_lcpu_cookie_t cookie)
1260e4b86885SCheng Sean Ye {
1261e4b86885SCheng Sean Ye return (COOKIE2XCP(cookie)->mc_chipid);
1262e4b86885SCheng Sean Ye }
1263e4b86885SCheng Sean Ye
1264e4b86885SCheng Sean Ye id_t
xen_physcpu_coreid(xen_mc_lcpu_cookie_t cookie)1265e4b86885SCheng Sean Ye xen_physcpu_coreid(xen_mc_lcpu_cookie_t cookie)
1266e4b86885SCheng Sean Ye {
1267e4b86885SCheng Sean Ye return (COOKIE2XCP(cookie)->mc_coreid);
1268e4b86885SCheng Sean Ye }
1269e4b86885SCheng Sean Ye
1270e4b86885SCheng Sean Ye id_t
xen_physcpu_strandid(xen_mc_lcpu_cookie_t cookie)1271e4b86885SCheng Sean Ye xen_physcpu_strandid(xen_mc_lcpu_cookie_t cookie)
1272e4b86885SCheng Sean Ye {
1273e4b86885SCheng Sean Ye return (COOKIE2XCP(cookie)->mc_threadid);
1274e4b86885SCheng Sean Ye }
1275e4b86885SCheng Sean Ye
1276e4b86885SCheng Sean Ye id_t
xen_physcpu_initial_apicid(xen_mc_lcpu_cookie_t cookie)1277074bb90dSTom Pothier xen_physcpu_initial_apicid(xen_mc_lcpu_cookie_t cookie)
1278074bb90dSTom Pothier {
1279074bb90dSTom Pothier return (COOKIE2XCP(cookie)->mc_clusterid);
1280074bb90dSTom Pothier }
1281074bb90dSTom Pothier
1282074bb90dSTom Pothier id_t
xen_physcpu_logical_id(xen_mc_lcpu_cookie_t cookie)1283e4b86885SCheng Sean Ye xen_physcpu_logical_id(xen_mc_lcpu_cookie_t cookie)
1284e4b86885SCheng Sean Ye {
1285e4b86885SCheng Sean Ye return (COOKIE2XCP(cookie)->mc_cpunr);
1286e4b86885SCheng Sean Ye }
1287e4b86885SCheng Sean Ye
1288e4b86885SCheng Sean Ye boolean_t
xen_physcpu_is_cmt(xen_mc_lcpu_cookie_t cookie)1289e4b86885SCheng Sean Ye xen_physcpu_is_cmt(xen_mc_lcpu_cookie_t cookie)
1290e4b86885SCheng Sean Ye {
1291e4b86885SCheng Sean Ye return (COOKIE2XCP(cookie)->mc_nthreads > 1);
1292e4b86885SCheng Sean Ye }
1293e4b86885SCheng Sean Ye
1294e4b86885SCheng Sean Ye uint64_t
xen_physcpu_mcg_cap(xen_mc_lcpu_cookie_t cookie)1295e4b86885SCheng Sean Ye xen_physcpu_mcg_cap(xen_mc_lcpu_cookie_t cookie)
1296e4b86885SCheng Sean Ye {
1297e4b86885SCheng Sean Ye xen_mc_logical_cpu_t *xcp = COOKIE2XCP(cookie);
1298e4b86885SCheng Sean Ye
1299e4b86885SCheng Sean Ye /*
1300e4b86885SCheng Sean Ye * Need to #define the indices, or search through the array.
1301e4b86885SCheng Sean Ye */
1302e4b86885SCheng Sean Ye return (xcp->mc_msrvalues[0].value);
1303e4b86885SCheng Sean Ye }
13047eea693dSMark Johnson
13057eea693dSMark Johnson int
xen_map_gref(uint_t cmd,gnttab_map_grant_ref_t * mapop,uint_t count,boolean_t uvaddr)13067eea693dSMark Johnson xen_map_gref(uint_t cmd, gnttab_map_grant_ref_t *mapop, uint_t count,
13077eea693dSMark Johnson boolean_t uvaddr)
13087eea693dSMark Johnson {
13097eea693dSMark Johnson long rc;
1310349b53ddSStuart Maybee uint_t i;
13117eea693dSMark Johnson
13127eea693dSMark Johnson ASSERT(cmd == GNTTABOP_map_grant_ref);
13137eea693dSMark Johnson
13147eea693dSMark Johnson #if !defined(_BOOT)
1315349b53ddSStuart Maybee if (uvaddr == B_FALSE) {
13167eea693dSMark Johnson for (i = 0; i < count; ++i) {
1317349b53ddSStuart Maybee mapop[i].flags |= (PT_FOREIGN <<_GNTMAP_guest_avail0);
13187eea693dSMark Johnson }
13197eea693dSMark Johnson }
13207eea693dSMark Johnson #endif
13217eea693dSMark Johnson
1322349b53ddSStuart Maybee rc = HYPERVISOR_grant_table_op(cmd, mapop, count);
1323349b53ddSStuart Maybee
13247eea693dSMark Johnson return (rc);
13257eea693dSMark Johnson }
1326349b53ddSStuart Maybee
1327349b53ddSStuart Maybee static int
xpv_get_physinfo(xen_sysctl_physinfo_t * pi)1328349b53ddSStuart Maybee xpv_get_physinfo(xen_sysctl_physinfo_t *pi)
1329349b53ddSStuart Maybee {
1330349b53ddSStuart Maybee xen_sysctl_t op;
1331349b53ddSStuart Maybee struct sp { void *p; } *sp = (struct sp *)&op.u.physinfo.cpu_to_node;
1332349b53ddSStuart Maybee int ret;
1333349b53ddSStuart Maybee
1334349b53ddSStuart Maybee bzero(&op, sizeof (op));
1335349b53ddSStuart Maybee op.cmd = XEN_SYSCTL_physinfo;
1336349b53ddSStuart Maybee op.interface_version = XEN_SYSCTL_INTERFACE_VERSION;
1337349b53ddSStuart Maybee /*LINTED: constant in conditional context*/
1338349b53ddSStuart Maybee set_xen_guest_handle(*sp, NULL);
1339349b53ddSStuart Maybee
1340349b53ddSStuart Maybee ret = HYPERVISOR_sysctl(&op);
1341349b53ddSStuart Maybee
1342349b53ddSStuart Maybee if (ret != 0)
1343349b53ddSStuart Maybee return (xen_xlate_errcode(ret));
1344349b53ddSStuart Maybee
1345349b53ddSStuart Maybee bcopy(&op.u.physinfo, pi, sizeof (op.u.physinfo));
1346349b53ddSStuart Maybee return (0);
1347349b53ddSStuart Maybee }
1348349b53ddSStuart Maybee
1349349b53ddSStuart Maybee /*
1350349b53ddSStuart Maybee * On dom0, we can determine the number of physical cpus on the machine.
1351349b53ddSStuart Maybee * This number is important when figuring out what workarounds are
1352349b53ddSStuart Maybee * appropriate, so compute it now.
1353349b53ddSStuart Maybee */
1354349b53ddSStuart Maybee uint_t
xpv_nr_phys_cpus(void)1355349b53ddSStuart Maybee xpv_nr_phys_cpus(void)
1356349b53ddSStuart Maybee {
1357349b53ddSStuart Maybee static uint_t nphyscpus = 0;
1358349b53ddSStuart Maybee
1359349b53ddSStuart Maybee ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1360349b53ddSStuart Maybee
1361349b53ddSStuart Maybee if (nphyscpus == 0) {
1362349b53ddSStuart Maybee xen_sysctl_physinfo_t pi;
1363349b53ddSStuart Maybee int ret;
1364349b53ddSStuart Maybee
1365349b53ddSStuart Maybee if ((ret = xpv_get_physinfo(&pi)) != 0)
1366349b53ddSStuart Maybee panic("xpv_get_physinfo() failed: %d\n", ret);
1367349b53ddSStuart Maybee nphyscpus = pi.nr_cpus;
1368349b53ddSStuart Maybee }
1369349b53ddSStuart Maybee return (nphyscpus);
1370349b53ddSStuart Maybee }
1371349b53ddSStuart Maybee
1372349b53ddSStuart Maybee pgcnt_t
xpv_nr_phys_pages(void)1373349b53ddSStuart Maybee xpv_nr_phys_pages(void)
1374349b53ddSStuart Maybee {
1375349b53ddSStuart Maybee xen_sysctl_physinfo_t pi;
1376349b53ddSStuart Maybee int ret;
1377349b53ddSStuart Maybee
1378349b53ddSStuart Maybee ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1379349b53ddSStuart Maybee
1380349b53ddSStuart Maybee if ((ret = xpv_get_physinfo(&pi)) != 0)
1381349b53ddSStuart Maybee panic("xpv_get_physinfo() failed: %d\n", ret);
1382349b53ddSStuart Maybee
1383349b53ddSStuart Maybee return ((pgcnt_t)pi.total_pages);
1384349b53ddSStuart Maybee }
1385349b53ddSStuart Maybee
1386349b53ddSStuart Maybee uint64_t
xpv_cpu_khz(void)1387349b53ddSStuart Maybee xpv_cpu_khz(void)
1388349b53ddSStuart Maybee {
1389349b53ddSStuart Maybee xen_sysctl_physinfo_t pi;
1390349b53ddSStuart Maybee int ret;
1391349b53ddSStuart Maybee
1392349b53ddSStuart Maybee ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1393349b53ddSStuart Maybee
1394349b53ddSStuart Maybee if ((ret = xpv_get_physinfo(&pi)) != 0)
1395349b53ddSStuart Maybee panic("xpv_get_physinfo() failed: %d\n", ret);
1396349b53ddSStuart Maybee return ((uint64_t)pi.cpu_khz);
1397349b53ddSStuart Maybee }
1398