1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* derived from netbsd's xen_machdep.c 1.1.2.1 */
28
29 /*
30 *
31 * Copyright (c) 2004 Christian Limpach.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. This section intentionally left blank.
43 * 4. The name of the author may not be used to endorse or promote products
44 * derived from this software without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
47 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
48 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
49 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
50 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
52 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
53 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
54 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
55 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
56 */
57 /*
58 * Section 3 of the above license was updated in response to bug 6379571.
59 */
60
61 #include <sys/xpv_user.h>
62
63 /* XXX 3.3. TODO remove this include */
64 #include <xen/public/arch-x86/xen-mca.h>
65
66 #include <sys/ctype.h>
67 #include <sys/types.h>
68 #include <sys/cmn_err.h>
69 #include <sys/trap.h>
70 #include <sys/segments.h>
71 #include <sys/hypervisor.h>
72 #include <sys/xen_mmu.h>
73 #include <sys/machsystm.h>
74 #include <sys/promif.h>
75 #include <sys/bootconf.h>
76 #include <sys/bootinfo.h>
77 #include <sys/cpr.h>
78 #include <sys/taskq.h>
79 #include <sys/uadmin.h>
80 #include <sys/evtchn_impl.h>
81 #include <sys/archsystm.h>
82 #include <xen/sys/xenbus_impl.h>
83 #include <sys/mach_mmu.h>
84 #include <vm/hat_i86.h>
85 #include <sys/gnttab.h>
86 #include <sys/reboot.h>
87 #include <sys/stack.h>
88 #include <sys/clock.h>
89 #include <sys/bitmap.h>
90 #include <sys/processor.h>
91 #include <sys/xen_errno.h>
92 #include <sys/xpv_panic.h>
93 #include <sys/smp_impldefs.h>
94 #include <sys/cpu.h>
95 #include <sys/balloon_impl.h>
96 #include <sys/ddi.h>
97
98 #ifdef DEBUG
99 #define SUSPEND_DEBUG if (xen_suspend_debug) xen_printf
100 #else
101 #define SUSPEND_DEBUG(...)
102 #endif
103
104 int cpr_debug;
105 cpuset_t cpu_suspend_lost_set;
106 static int xen_suspend_debug;
107
108 uint_t xen_phys_ncpus;
109 xen_mc_logical_cpu_t *xen_phys_cpus;
110 int xen_physinfo_debug = 0;
111
112 /*
113 * Determine helpful version information.
114 *
115 * (And leave copies in the data segment so we can look at them later
116 * with e.g. kmdb.)
117 */
118
119 typedef enum xen_version {
120 XENVER_BOOT_IDX,
121 XENVER_CURRENT_IDX
122 } xen_version_t;
123
124 struct xenver {
125 ulong_t xv_major;
126 ulong_t xv_minor;
127 ulong_t xv_revision;
128 xen_extraversion_t xv_ver;
129 ulong_t xv_is_xvm;
130 xen_changeset_info_t xv_chgset;
131 xen_compile_info_t xv_build;
132 xen_capabilities_info_t xv_caps;
133 } xenver[2];
134
135 #define XENVER_BOOT(m) (xenver[XENVER_BOOT_IDX].m)
136 #define XENVER_CURRENT(m) (xenver[XENVER_CURRENT_IDX].m)
137
138 /*
139 * Update the xenver data. We maintain two copies, boot and
140 * current. If we are setting the boot, then also set current.
141 */
142 static void
xen_set_version(xen_version_t idx)143 xen_set_version(xen_version_t idx)
144 {
145 ulong_t ver;
146
147 bzero(&xenver[idx], sizeof (xenver[idx]));
148
149 ver = HYPERVISOR_xen_version(XENVER_version, 0);
150
151 xenver[idx].xv_major = BITX(ver, 31, 16);
152 xenver[idx].xv_minor = BITX(ver, 15, 0);
153
154 (void) HYPERVISOR_xen_version(XENVER_extraversion, &xenver[idx].xv_ver);
155
156 /*
157 * The revision is buried in the extraversion information that is
158 * maintained by the hypervisor. For our purposes we expect that
159 * the revision number is:
160 * - the second character in the extraversion information
161 * - one character long
162 * - numeric digit
163 * If it isn't then we can't extract the revision and we leave it
164 * set to 0.
165 */
166 if (strlen(xenver[idx].xv_ver) > 1 && isdigit(xenver[idx].xv_ver[1]))
167 xenver[idx].xv_revision = xenver[idx].xv_ver[1] - '0';
168 else
169 cmn_err(CE_WARN, "Cannot extract revision on this hypervisor "
170 "version: v%s, unexpected version format",
171 xenver[idx].xv_ver);
172
173 xenver[idx].xv_is_xvm = 0;
174
175 if (strstr(xenver[idx].xv_ver, "-xvm") != NULL)
176 xenver[idx].xv_is_xvm = 1;
177
178 (void) HYPERVISOR_xen_version(XENVER_changeset,
179 &xenver[idx].xv_chgset);
180
181 (void) HYPERVISOR_xen_version(XENVER_compile_info,
182 &xenver[idx].xv_build);
183 /*
184 * Capabilities are a set of space separated ascii strings
185 * e.g. 'xen-3.1-x86_32p' or 'hvm-3.2-x86_64'
186 */
187 (void) HYPERVISOR_xen_version(XENVER_capabilities,
188 &xenver[idx].xv_caps);
189
190 cmn_err(CE_CONT, "?v%lu.%lu%s chgset '%s'\n", xenver[idx].xv_major,
191 xenver[idx].xv_minor, xenver[idx].xv_ver, xenver[idx].xv_chgset);
192
193 if (idx == XENVER_BOOT_IDX)
194 bcopy(&xenver[XENVER_BOOT_IDX], &xenver[XENVER_CURRENT_IDX],
195 sizeof (xenver[XENVER_BOOT_IDX]));
196 }
197
198 typedef enum xen_hypervisor_check {
199 XEN_RUN_CHECK,
200 XEN_SUSPEND_CHECK
201 } xen_hypervisor_check_t;
202
203 /*
204 * To run the hypervisor must be 3.0.4 or better. To suspend/resume
205 * we need 3.0.4 or better and if it is 3.0.4. then it must be provided
206 * by the Solaris xVM project.
207 * Checking can be disabled for testing purposes by setting the
208 * xen_suspend_debug variable.
209 */
210 static int
xen_hypervisor_supports_solaris(xen_hypervisor_check_t check)211 xen_hypervisor_supports_solaris(xen_hypervisor_check_t check)
212 {
213 if (xen_suspend_debug == 1)
214 return (1);
215 if (XENVER_CURRENT(xv_major) < 3)
216 return (0);
217 if (XENVER_CURRENT(xv_major) > 3)
218 return (1);
219 if (XENVER_CURRENT(xv_minor) > 0)
220 return (1);
221 if (XENVER_CURRENT(xv_revision) < 4)
222 return (0);
223 if (check == XEN_SUSPEND_CHECK && XENVER_CURRENT(xv_revision) == 4 &&
224 !XENVER_CURRENT(xv_is_xvm))
225 return (0);
226
227 return (1);
228 }
229
230 /*
231 * If the hypervisor is -xvm, or 3.1.2 or higher, we don't need the
232 * workaround.
233 */
234 static void
xen_pte_workaround(void)235 xen_pte_workaround(void)
236 {
237 extern int pt_kern;
238
239 if (XENVER_CURRENT(xv_major) != 3)
240 return;
241 if (XENVER_CURRENT(xv_minor) > 1)
242 return;
243 if (XENVER_CURRENT(xv_minor) == 1 &&
244 XENVER_CURRENT(xv_revision) > 1)
245 return;
246 if (XENVER_CURRENT(xv_is_xvm))
247 return;
248
249 pt_kern = PT_USER;
250 }
251
252 void
xen_set_callback(void (* func)(void),uint_t type,uint_t flags)253 xen_set_callback(void (*func)(void), uint_t type, uint_t flags)
254 {
255 struct callback_register cb;
256
257 bzero(&cb, sizeof (cb));
258 cb.address = (ulong_t)func;
259 cb.type = type;
260 cb.flags = flags;
261
262 /*
263 * XXPV always ignore return value for NMI
264 */
265 if (HYPERVISOR_callback_op(CALLBACKOP_register, &cb) != 0 &&
266 type != CALLBACKTYPE_nmi)
267 panic("HYPERVISOR_callback_op failed");
268 }
269
270 void
xen_init_callbacks(void)271 xen_init_callbacks(void)
272 {
273 /*
274 * register event (interrupt) handler.
275 */
276 xen_set_callback(xen_callback, CALLBACKTYPE_event, 0);
277
278 /*
279 * failsafe handler.
280 */
281 xen_set_callback(xen_failsafe_callback, CALLBACKTYPE_failsafe,
282 CALLBACKF_mask_events);
283
284 /*
285 * NMI handler.
286 */
287 xen_set_callback(nmiint, CALLBACKTYPE_nmi, 0);
288
289 /*
290 * system call handler
291 * XXPV move to init_cpu_syscall?
292 */
293 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
294 CALLBACKF_mask_events);
295 }
296
297
298 /*
299 * cmn_err() followed by a 1/4 second delay; this gives the
300 * logging service a chance to flush messages and helps avoid
301 * intermixing output from prom_printf().
302 * XXPV: doesn't exactly help us on UP though.
303 */
304 /*PRINTFLIKE2*/
305 void
cpr_err(int ce,const char * fmt,...)306 cpr_err(int ce, const char *fmt, ...)
307 {
308 va_list adx;
309
310 va_start(adx, fmt);
311 vcmn_err(ce, fmt, adx);
312 va_end(adx);
313 drv_usecwait(MICROSEC >> 2);
314 }
315
316 void
xen_suspend_devices(void)317 xen_suspend_devices(void)
318 {
319 int rc;
320
321 SUSPEND_DEBUG("xen_suspend_devices\n");
322
323 if ((rc = cpr_suspend_devices(ddi_root_node())) != 0)
324 panic("failed to suspend devices: %d", rc);
325 }
326
327 void
xen_resume_devices(void)328 xen_resume_devices(void)
329 {
330 int rc;
331
332 SUSPEND_DEBUG("xen_resume_devices\n");
333
334 if ((rc = cpr_resume_devices(ddi_root_node(), 0)) != 0)
335 panic("failed to resume devices: %d", rc);
336 }
337
338 /*
339 * The list of mfn pages is out of date. Recompute it.
340 */
341 static void
rebuild_mfn_list(void)342 rebuild_mfn_list(void)
343 {
344 int i = 0;
345 size_t sz;
346 size_t off;
347 pfn_t pfn;
348
349 SUSPEND_DEBUG("rebuild_mfn_list\n");
350
351 sz = ((mfn_count * sizeof (mfn_t)) + MMU_PAGEOFFSET) & MMU_PAGEMASK;
352
353 for (off = 0; off < sz; off += MMU_PAGESIZE) {
354 size_t j = mmu_btop(off);
355 if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) {
356 pfn = hat_getpfnum(kas.a_hat,
357 (caddr_t)&mfn_list_pages[j]);
358 mfn_list_pages_page[i++] = pfn_to_mfn(pfn);
359 }
360
361 pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list + off);
362 mfn_list_pages[j] = pfn_to_mfn(pfn);
363 }
364
365 pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list_pages_page);
366 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
367 = pfn_to_mfn(pfn);
368 }
369
370 static void
suspend_cpus(void)371 suspend_cpus(void)
372 {
373 int i;
374
375 SUSPEND_DEBUG("suspend_cpus\n");
376
377 mp_enter_barrier();
378
379 for (i = 1; i < ncpus; i++) {
380 if (!CPU_IN_SET(cpu_suspend_lost_set, i)) {
381 SUSPEND_DEBUG("xen_vcpu_down %d\n", i);
382 (void) xen_vcpu_down(i);
383 }
384
385 mach_cpucontext_reset(cpu[i]);
386 }
387 }
388
389 static void
resume_cpus(void)390 resume_cpus(void)
391 {
392 int i;
393
394 for (i = 1; i < ncpus; i++) {
395 if (cpu[i] == NULL)
396 continue;
397
398 if (!CPU_IN_SET(cpu_suspend_lost_set, i)) {
399 SUSPEND_DEBUG("xen_vcpu_up %d\n", i);
400 mach_cpucontext_restore(cpu[i]);
401 (void) xen_vcpu_up(i);
402 }
403 }
404
405 mp_leave_barrier();
406 }
407
408 /*
409 * Top level routine to direct suspend/resume of a domain.
410 */
411 void
xen_suspend_domain(void)412 xen_suspend_domain(void)
413 {
414 extern void rtcsync(void);
415 extern hrtime_t hres_last_tick;
416 mfn_t start_info_mfn;
417 ulong_t flags;
418 pfn_t pfn;
419 int i;
420
421 /*
422 * Check that we are happy to suspend on this hypervisor.
423 */
424 if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) {
425 cpr_err(CE_WARN, "Cannot suspend on this hypervisor "
426 "version: v%lu.%lu%s, need at least version v3.0.4 or "
427 "-xvm based hypervisor", XENVER_CURRENT(xv_major),
428 XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver));
429 return;
430 }
431
432 /*
433 * XXPV - Are we definitely OK to suspend by the time we've connected
434 * the handler?
435 */
436
437 cpr_err(CE_NOTE, "Domain suspending for save/migrate");
438
439 SUSPEND_DEBUG("xen_suspend_domain\n");
440
441 /*
442 * suspend interrupts and devices
443 * XXPV - we use suspend/resume for both save/restore domains (like sun
444 * cpr) and for migration. Would be nice to know the difference if
445 * possible. For save/restore where down time may be a long time, we
446 * may want to do more of the things that cpr does. (i.e. notify user
447 * processes, shrink memory footprint for faster restore, etc.)
448 */
449 xen_suspend_devices();
450 SUSPEND_DEBUG("xenbus_suspend\n");
451 xenbus_suspend();
452
453 pfn = hat_getpfnum(kas.a_hat, (caddr_t)xen_info);
454 start_info_mfn = pfn_to_mfn(pfn);
455
456 /*
457 * XXPV: cpu hotplug can hold this under a xenbus watch. Are we safe
458 * wrt xenbus being suspended here?
459 */
460 mutex_enter(&cpu_lock);
461
462 /*
463 * Suspend must be done on vcpu 0, as no context for other CPUs is
464 * saved.
465 *
466 * XXPV - add to taskq API ?
467 */
468 thread_affinity_set(curthread, 0);
469 kpreempt_disable();
470
471 SUSPEND_DEBUG("xen_start_migrate\n");
472 xen_start_migrate();
473 if (ncpus > 1)
474 suspend_cpus();
475
476 /*
477 * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
478 * any holder would have dropped it to get through suspend_cpus().
479 */
480 mutex_enter(&ec_lock);
481
482 /*
483 * From here on in, we can't take locks.
484 */
485 SUSPEND_DEBUG("ec_suspend\n");
486 ec_suspend();
487 SUSPEND_DEBUG("gnttab_suspend\n");
488 gnttab_suspend();
489
490 flags = intr_clear();
491
492 xpv_time_suspend();
493
494 /*
495 * Currently, the hypervisor incorrectly fails to bring back
496 * powered-down VCPUs. Thus we need to record any powered-down VCPUs
497 * to prevent any attempts to operate on them. But we have to do this
498 * *after* the very first time we do ec_suspend().
499 */
500 for (i = 1; i < ncpus; i++) {
501 if (cpu[i] == NULL)
502 continue;
503
504 if (cpu_get_state(cpu[i]) == P_POWEROFF)
505 CPUSET_ATOMIC_ADD(cpu_suspend_lost_set, i);
506 }
507
508 /*
509 * The dom0 save/migrate code doesn't automatically translate
510 * these into PFNs, but expects them to be, so we do it here.
511 * We don't use mfn_to_pfn() because so many OS services have
512 * been disabled at this point.
513 */
514 xen_info->store_mfn = mfn_to_pfn_mapping[xen_info->store_mfn];
515 xen_info->console.domU.mfn =
516 mfn_to_pfn_mapping[xen_info->console.domU.mfn];
517
518 if (CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0) {
519 prom_printf("xen_suspend_domain(): "
520 "CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask not set\n");
521 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
522 }
523
524 if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
525 0, UVMF_INVLPG)) {
526 prom_printf("xen_suspend_domain(): "
527 "HYPERVISOR_update_va_mapping() failed\n");
528 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
529 }
530
531 SUSPEND_DEBUG("HYPERVISOR_suspend\n");
532
533 /*
534 * At this point we suspend and sometime later resume.
535 */
536 if (HYPERVISOR_suspend(start_info_mfn)) {
537 prom_printf("xen_suspend_domain(): "
538 "HYPERVISOR_suspend() failed\n");
539 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
540 }
541
542 /*
543 * Point HYPERVISOR_shared_info to its new value.
544 */
545 if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
546 xen_info->shared_info | PT_NOCONSIST | PT_VALID | PT_WRITABLE,
547 UVMF_INVLPG))
548 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
549
550 if (xen_info->nr_pages != mfn_count) {
551 prom_printf("xen_suspend_domain(): number of pages"
552 " changed, was 0x%lx, now 0x%lx\n", mfn_count,
553 xen_info->nr_pages);
554 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
555 }
556
557 xpv_time_resume();
558
559 cached_max_mfn = 0;
560
561 SUSPEND_DEBUG("gnttab_resume\n");
562 gnttab_resume();
563
564 /* XXPV: add a note that this must be lockless. */
565 SUSPEND_DEBUG("ec_resume\n");
566 ec_resume();
567
568 intr_restore(flags);
569
570 if (ncpus > 1)
571 resume_cpus();
572
573 mutex_exit(&ec_lock);
574 xen_end_migrate();
575 mutex_exit(&cpu_lock);
576
577 /*
578 * Now we can take locks again.
579 */
580
581 /*
582 * Force the tick value used for tv_nsec in hres_tick() to be up to
583 * date. rtcsync() will reset the hrestime value appropriately.
584 */
585 hres_last_tick = xpv_gethrtime();
586
587 /*
588 * XXPV: we need to have resumed the CPUs since this takes locks, but
589 * can remote CPUs see bad state? Presumably yes. Should probably nest
590 * taking of todlock inside of cpu_lock, or vice versa, then provide an
591 * unlocked version. Probably need to call clkinitf to reset cpu freq
592 * and re-calibrate if we migrated to a different speed cpu. Also need
593 * to make a (re)init_cpu_info call to update processor info structs
594 * and device tree info. That remains to be written at the moment.
595 */
596 rtcsync();
597
598 rebuild_mfn_list();
599
600 SUSPEND_DEBUG("xenbus_resume\n");
601 xenbus_resume();
602 SUSPEND_DEBUG("xenbus_resume_devices\n");
603 xen_resume_devices();
604
605 thread_affinity_clear(curthread);
606 kpreempt_enable();
607
608 SUSPEND_DEBUG("finished xen_suspend_domain\n");
609
610 /*
611 * We have restarted our suspended domain, update the hypervisor
612 * details. NB: This must be done at the end of this function,
613 * since we need the domain to be completely resumed before
614 * these functions will work correctly.
615 */
616 xen_set_version(XENVER_CURRENT_IDX);
617
618 /*
619 * We can check and report a warning, but we don't stop the
620 * process.
621 */
622 if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0)
623 cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s "
624 "but need at least version v3.0.4",
625 XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor),
626 XENVER_CURRENT(xv_ver));
627
628 cmn_err(CE_NOTE, "domain restore/migrate completed");
629 }
630
631 uint_t
xen_debug_handler(caddr_t arg __unused,caddr_t arg1 __unused)632 xen_debug_handler(caddr_t arg __unused, caddr_t arg1 __unused)
633 {
634 debug_enter("External debug event received");
635
636 /*
637 * If we've not got KMDB loaded, output some stuff difficult to capture
638 * from a domain core.
639 */
640 if (!(boothowto & RB_DEBUG)) {
641 shared_info_t *si = HYPERVISOR_shared_info;
642 int i;
643
644 prom_printf("evtchn_pending [ ");
645 for (i = 0; i < 8; i++)
646 prom_printf("%lx ", si->evtchn_pending[i]);
647 prom_printf("]\nevtchn_mask [ ");
648 for (i = 0; i < 8; i++)
649 prom_printf("%lx ", si->evtchn_mask[i]);
650 prom_printf("]\n");
651
652 for (i = 0; i < ncpus; i++) {
653 vcpu_info_t *vcpu = &si->vcpu_info[i];
654 if (cpu[i] == NULL)
655 continue;
656 prom_printf("CPU%d pending %d mask %d sel %lx\n",
657 i, vcpu->evtchn_upcall_pending,
658 vcpu->evtchn_upcall_mask,
659 vcpu->evtchn_pending_sel);
660 }
661 }
662
663 return (0);
664 }
665
666 /*ARGSUSED*/
667 static void
xen_sysrq_handler(struct xenbus_watch * watch,const char ** vec,unsigned int len)668 xen_sysrq_handler(struct xenbus_watch *watch, const char **vec,
669 unsigned int len)
670 {
671 xenbus_transaction_t xbt;
672 char key = '\0';
673 int ret;
674
675 retry:
676 if (xenbus_transaction_start(&xbt)) {
677 cmn_err(CE_WARN, "failed to start sysrq transaction");
678 return;
679 }
680
681 if ((ret = xenbus_scanf(xbt, "control", "sysrq", "%c", &key)) != 0) {
682 /*
683 * ENOENT happens in response to our own xenbus_rm.
684 * XXPV - this happens spuriously on boot?
685 */
686 if (ret != ENOENT)
687 cmn_err(CE_WARN, "failed to read sysrq: %d", ret);
688 goto out;
689 }
690
691 if ((ret = xenbus_rm(xbt, "control", "sysrq")) != 0) {
692 cmn_err(CE_WARN, "failed to reset sysrq: %d", ret);
693 goto out;
694 }
695
696 if (xenbus_transaction_end(xbt, 0) == EAGAIN)
697 goto retry;
698
699 /*
700 * Somewhat arbitrary - on Linux this means 'reboot'. We could just
701 * accept any key, but this might increase the risk of sending a
702 * harmless sysrq to the wrong domain...
703 */
704 if (key == 'b')
705 (void) xen_debug_handler(NULL, NULL);
706 else
707 cmn_err(CE_WARN, "Ignored sysrq %c", key);
708 return;
709
710 out:
711 (void) xenbus_transaction_end(xbt, 1);
712 }
713
714 taskq_t *xen_shutdown_tq;
715
716 #define SHUTDOWN_INVALID -1
717 #define SHUTDOWN_POWEROFF 0
718 #define SHUTDOWN_REBOOT 1
719 #define SHUTDOWN_SUSPEND 2
720 #define SHUTDOWN_HALT 3
721 #define SHUTDOWN_MAX 4
722
723 #define SHUTDOWN_TIMEOUT_SECS (60 * 5)
724
725 static const char *cmd_strings[SHUTDOWN_MAX] = {
726 "poweroff",
727 "reboot",
728 "suspend",
729 "halt"
730 };
731
732 static void
xen_dirty_shutdown(void * arg)733 xen_dirty_shutdown(void *arg)
734 {
735 int cmd = (uintptr_t)arg;
736
737 cmn_err(CE_WARN, "Externally requested shutdown failed or "
738 "timed out.\nShutting down.\n");
739
740 switch (cmd) {
741 case SHUTDOWN_HALT:
742 case SHUTDOWN_POWEROFF:
743 (void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred);
744 break;
745 case SHUTDOWN_REBOOT:
746 (void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred);
747 break;
748 }
749 }
750
751 static void
xen_shutdown(void * arg)752 xen_shutdown(void *arg)
753 {
754 int cmd = (uintptr_t)arg;
755 proc_t *initpp;
756
757 ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX);
758
759 if (cmd == SHUTDOWN_SUSPEND) {
760 xen_suspend_domain();
761 return;
762 }
763
764 switch (cmd) {
765 case SHUTDOWN_POWEROFF:
766 force_shutdown_method = AD_POWEROFF;
767 break;
768 case SHUTDOWN_HALT:
769 force_shutdown_method = AD_HALT;
770 break;
771 case SHUTDOWN_REBOOT:
772 force_shutdown_method = AD_BOOT;
773 break;
774 }
775
776 /*
777 * If we're still booting and init(1) isn't set up yet, simply halt.
778 */
779 mutex_enter(&pidlock);
780 initpp = prfind(P_INITPID);
781 mutex_exit(&pidlock);
782 if (initpp == NULL) {
783 extern void halt(char *);
784 halt("Power off the System"); /* just in case */
785 }
786
787 /*
788 * else, graceful shutdown with inittab and all getting involved
789 */
790 psignal(initpp, SIGPWR);
791
792 (void) timeout(xen_dirty_shutdown, arg,
793 SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC));
794 }
795
796 /*ARGSUSED*/
797 static void
xen_shutdown_handler(struct xenbus_watch * watch,const char ** vec,unsigned int len)798 xen_shutdown_handler(struct xenbus_watch *watch, const char **vec,
799 unsigned int len)
800 {
801 char *str;
802 xenbus_transaction_t xbt;
803 int err, shutdown_code = SHUTDOWN_INVALID;
804 unsigned int slen;
805
806 again:
807 err = xenbus_transaction_start(&xbt);
808 if (err)
809 return;
810 if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) {
811 (void) xenbus_transaction_end(xbt, 1);
812 return;
813 }
814
815 SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str);
816
817 /*
818 * If this is a watch fired from our write below, check out early to
819 * avoid an infinite loop.
820 */
821 if (strcmp(str, "") == 0) {
822 (void) xenbus_transaction_end(xbt, 0);
823 kmem_free(str, slen);
824 return;
825 } else if (strcmp(str, "poweroff") == 0) {
826 shutdown_code = SHUTDOWN_POWEROFF;
827 } else if (strcmp(str, "reboot") == 0) {
828 shutdown_code = SHUTDOWN_REBOOT;
829 } else if (strcmp(str, "suspend") == 0) {
830 shutdown_code = SHUTDOWN_SUSPEND;
831 } else if (strcmp(str, "halt") == 0) {
832 shutdown_code = SHUTDOWN_HALT;
833 } else {
834 printf("Ignoring shutdown request: %s\n", str);
835 }
836
837 /*
838 * XXPV Should we check the value of xenbus_write() too, or are all
839 * errors automatically folded into xenbus_transaction_end() ??
840 */
841 (void) xenbus_write(xbt, "control", "shutdown", "");
842 err = xenbus_transaction_end(xbt, 0);
843 if (err == EAGAIN) {
844 SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id);
845 kmem_free(str, slen);
846 goto again;
847 }
848
849 kmem_free(str, slen);
850 if (shutdown_code != SHUTDOWN_INVALID) {
851 (void) taskq_dispatch(xen_shutdown_tq, xen_shutdown,
852 (void *)(intptr_t)shutdown_code, 0);
853 }
854 }
855
856 static struct xenbus_watch shutdown_watch;
857 static struct xenbus_watch sysrq_watch;
858
859 void
xen_late_startup(void)860 xen_late_startup(void)
861 {
862 if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
863 xen_shutdown_tq = taskq_create("shutdown_taskq", 1,
864 maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
865 shutdown_watch.node = "control/shutdown";
866 shutdown_watch.callback = xen_shutdown_handler;
867 if (register_xenbus_watch(&shutdown_watch))
868 cmn_err(CE_WARN, "Failed to set shutdown watcher");
869
870 sysrq_watch.node = "control/sysrq";
871 sysrq_watch.callback = xen_sysrq_handler;
872 if (register_xenbus_watch(&sysrq_watch))
873 cmn_err(CE_WARN, "Failed to set sysrq watcher");
874 }
875 balloon_init(xen_info->nr_pages);
876 }
877
878 #ifdef DEBUG
879 #define XEN_PRINTF_BUFSIZE 1024
880
881 char xen_printf_buffer[XEN_PRINTF_BUFSIZE];
882
883 /*
884 * Printf function that calls hypervisor directly. For DomU it only
885 * works when running on a xen hypervisor built with debug on. Works
886 * always since no I/O ring interaction is needed.
887 */
888 /*PRINTFLIKE1*/
889 void
xen_printf(const char * fmt,...)890 xen_printf(const char *fmt, ...)
891 {
892 va_list ap;
893
894 va_start(ap, fmt);
895 (void) vsnprintf(xen_printf_buffer, XEN_PRINTF_BUFSIZE, fmt, ap);
896 va_end(ap);
897
898 (void) HYPERVISOR_console_io(CONSOLEIO_write,
899 strlen(xen_printf_buffer), xen_printf_buffer);
900 }
901 #else
902 void
xen_printf(const char * fmt,...)903 xen_printf(const char *fmt, ...)
904 {
905 }
906 #endif /* DEBUG */
907
908 void
startup_xen_version(void)909 startup_xen_version(void)
910 {
911 xen_set_version(XENVER_BOOT_IDX);
912 if (xen_hypervisor_supports_solaris(XEN_RUN_CHECK) == 0)
913 cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s "
914 "but need at least version v3.0.4",
915 XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor),
916 XENVER_CURRENT(xv_ver));
917 xen_pte_workaround();
918 }
919
920 int xen_mca_simulate_mc_physinfo_failure = 0;
921
922 void
startup_xen_mca(void)923 startup_xen_mca(void)
924 {
925 if (!DOMAIN_IS_INITDOMAIN(xen_info))
926 return;
927
928 xen_phys_ncpus = 0;
929 xen_phys_cpus = NULL;
930
931 if (xen_mca_simulate_mc_physinfo_failure ||
932 xen_get_mc_physcpuinfo(NULL, &xen_phys_ncpus) != 0) {
933 cmn_err(CE_WARN,
934 "%sxen_get_mc_physinfo failure during xen MCA startup: "
935 "there will be no machine check support",
936 xen_mca_simulate_mc_physinfo_failure ? "(simulated) " : "");
937 return;
938 }
939
940 xen_phys_cpus = kmem_alloc(xen_phys_ncpus *
941 sizeof (xen_mc_logical_cpu_t), KM_NOSLEEP);
942
943 if (xen_phys_cpus == NULL) {
944 cmn_err(CE_WARN,
945 "xen_get_mc_physinfo failure: can't allocate CPU array");
946 return;
947 }
948
949 if (xen_get_mc_physcpuinfo(xen_phys_cpus, &xen_phys_ncpus) != 0) {
950 cmn_err(CE_WARN, "xen_get_mc_physinfo failure: no "
951 "physical CPU info");
952 kmem_free(xen_phys_cpus,
953 xen_phys_ncpus * sizeof (xen_mc_logical_cpu_t));
954 xen_phys_ncpus = 0;
955 xen_phys_cpus = NULL;
956 }
957
958 if (xen_physinfo_debug) {
959 xen_mc_logical_cpu_t *xcp;
960 unsigned i;
961
962 cmn_err(CE_NOTE, "xvm mca: %u physical cpus:\n",
963 xen_phys_ncpus);
964 for (i = 0; i < xen_phys_ncpus; i++) {
965 xcp = &xen_phys_cpus[i];
966 cmn_err(CE_NOTE, "cpu%u: (%u, %u, %u) apid %u",
967 xcp->mc_cpunr, xcp->mc_chipid, xcp->mc_coreid,
968 xcp->mc_threadid, xcp->mc_apicid);
969 }
970 }
971 }
972
973 /*
974 * Miscellaneous hypercall wrappers with slightly more verbose diagnostics.
975 */
976
977 void
xen_set_gdt(ulong_t * frame_list,int entries)978 xen_set_gdt(ulong_t *frame_list, int entries)
979 {
980 int err;
981 if ((err = HYPERVISOR_set_gdt(frame_list, entries)) != 0) {
982 /*
983 * X_EINVAL: reserved entry or bad frames
984 * X_EFAULT: bad address
985 */
986 panic("xen_set_gdt(%p, %d): error %d",
987 (void *)frame_list, entries, -(int)err);
988 }
989 }
990
991 void
xen_set_ldt(user_desc_t * ldt,uint_t nsels)992 xen_set_ldt(user_desc_t *ldt, uint_t nsels)
993 {
994 struct mmuext_op op;
995 long err;
996
997 op.cmd = MMUEXT_SET_LDT;
998 op.arg1.linear_addr = (uintptr_t)ldt;
999 op.arg2.nr_ents = nsels;
1000
1001 if ((err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) != 0) {
1002 panic("xen_set_ldt(%p, %d): error %d",
1003 (void *)ldt, nsels, -(int)err);
1004 }
1005 }
1006
1007 void
xen_stack_switch(ulong_t ss,ulong_t esp)1008 xen_stack_switch(ulong_t ss, ulong_t esp)
1009 {
1010 long err;
1011
1012 if ((err = HYPERVISOR_stack_switch(ss, esp)) != 0) {
1013 /*
1014 * X_EPERM: bad selector
1015 */
1016 panic("xen_stack_switch(%lx, %lx): error %d", ss, esp,
1017 -(int)err);
1018 }
1019 }
1020
1021 long
xen_set_trap_table(trap_info_t * table)1022 xen_set_trap_table(trap_info_t *table)
1023 {
1024 long err;
1025
1026 if ((err = HYPERVISOR_set_trap_table(table)) != 0) {
1027 /*
1028 * X_EFAULT: bad address
1029 * X_EPERM: bad selector
1030 */
1031 panic("xen_set_trap_table(%p): error %d", (void *)table,
1032 -(int)err);
1033 }
1034 return (err);
1035 }
1036
1037 void
xen_set_segment_base(int reg,ulong_t value)1038 xen_set_segment_base(int reg, ulong_t value)
1039 {
1040 long err;
1041
1042 if ((err = HYPERVISOR_set_segment_base(reg, value)) != 0) {
1043 /*
1044 * X_EFAULT: bad address
1045 * X_EINVAL: bad type
1046 */
1047 panic("xen_set_segment_base(%d, %lx): error %d",
1048 reg, value, -(int)err);
1049 }
1050 }
1051
1052 /*
1053 * Translate a hypervisor errcode to a Solaris error code.
1054 */
1055 int
xen_xlate_errcode(int error)1056 xen_xlate_errcode(int error)
1057 {
1058 switch (-error) {
1059
1060 /*
1061 * Translate hypervisor errno's into native errno's
1062 */
1063
1064 #define CASE(num) case X_##num: error = num; break
1065
1066 CASE(EPERM); CASE(ENOENT); CASE(ESRCH);
1067 CASE(EINTR); CASE(EIO); CASE(ENXIO);
1068 CASE(E2BIG); CASE(ENOMEM); CASE(EACCES);
1069 CASE(EFAULT); CASE(EBUSY); CASE(EEXIST);
1070 CASE(ENODEV); CASE(EISDIR); CASE(EINVAL);
1071 CASE(ENOSPC); CASE(ESPIPE); CASE(EROFS);
1072 CASE(ENOSYS); CASE(ENOTEMPTY); CASE(EISCONN);
1073 CASE(ENODATA); CASE(EAGAIN);
1074
1075 #undef CASE
1076
1077 default:
1078 panic("xen_xlate_errcode: unknown error %d", error);
1079 }
1080
1081 return (error);
1082 }
1083
1084 /*
1085 * Raise PS_IOPL on current vcpu to user level.
1086 * Caller responsible for preventing kernel preemption.
1087 */
1088 void
xen_enable_user_iopl(void * arg __unused)1089 xen_enable_user_iopl(void *arg __unused)
1090 {
1091 physdev_set_iopl_t set_iopl;
1092 set_iopl.iopl = 3; /* user ring 3 */
1093 (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1094 }
1095
1096 /*
1097 * Drop PS_IOPL on current vcpu to kernel level
1098 */
1099 void
xen_disable_user_iopl(void * arg __unused)1100 xen_disable_user_iopl(void *arg __unused)
1101 {
1102 physdev_set_iopl_t set_iopl;
1103 set_iopl.iopl = 1; /* kernel pseudo ring 1 */
1104 (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1105 }
1106
1107 int
xen_gdt_setprot(cpu_t * cp,uint_t prot)1108 xen_gdt_setprot(cpu_t *cp, uint_t prot)
1109 {
1110 int err;
1111 int pt_bits = PT_VALID;
1112 if (prot & PROT_WRITE)
1113 pt_bits |= PT_WRITABLE;
1114
1115 if ((err = as_setprot(&kas, (caddr_t)cp->cpu_gdt,
1116 MMU_PAGESIZE, prot)) != 0)
1117 goto done;
1118
1119 err = xen_kpm_page(mmu_btop(cp->cpu_m.mcpu_gdtpa), pt_bits);
1120
1121 done:
1122 if (err) {
1123 cmn_err(CE_WARN, "cpu%d: xen_gdt_setprot(%s) failed: error %d",
1124 cp->cpu_id, (prot & PROT_WRITE) ? "writable" : "read-only",
1125 err);
1126 }
1127
1128 return (err);
1129 }
1130
1131 int
xen_ldt_setprot(user_desc_t * ldt,size_t lsize,uint_t prot)1132 xen_ldt_setprot(user_desc_t *ldt, size_t lsize, uint_t prot)
1133 {
1134 int err;
1135 caddr_t lva = (caddr_t)ldt;
1136 int pt_bits = PT_VALID;
1137 pgcnt_t npgs;
1138 if (prot & PROT_WRITE)
1139 pt_bits |= PT_WRITABLE;
1140
1141 if ((err = as_setprot(&kas, (caddr_t)ldt, lsize, prot)) != 0)
1142 goto done;
1143
1144
1145 ASSERT(IS_P2ALIGNED(lsize, PAGESIZE));
1146 npgs = mmu_btop(lsize);
1147 while (npgs--) {
1148 if ((err = xen_kpm_page(hat_getpfnum(kas.a_hat, lva),
1149 pt_bits)) != 0)
1150 break;
1151 lva += PAGESIZE;
1152 }
1153
1154 done:
1155 if (err) {
1156 cmn_err(CE_WARN, "xen_ldt_setprot(%p, %s) failed: error %d",
1157 (void *)lva,
1158 (prot & PROT_WRITE) ? "writable" : "read-only", err);
1159 }
1160
1161 return (err);
1162 }
1163
1164 int
xen_get_mc_physcpuinfo(xen_mc_logical_cpu_t * log_cpus,uint_t * ncpus)1165 xen_get_mc_physcpuinfo(xen_mc_logical_cpu_t *log_cpus, uint_t *ncpus)
1166 {
1167 xen_mc_t xmc;
1168 struct xen_mc_physcpuinfo *cpi = &xmc.u.mc_physcpuinfo;
1169
1170 cpi->ncpus = *ncpus;
1171 /*LINTED: constant in conditional context*/
1172 set_xen_guest_handle(cpi->info, log_cpus);
1173
1174 if (HYPERVISOR_mca(XEN_MC_physcpuinfo, &xmc) != 0)
1175 return (-1);
1176
1177 *ncpus = cpi->ncpus;
1178 return (0);
1179 }
1180
1181 void
print_panic(const char * str)1182 print_panic(const char *str)
1183 {
1184 xen_printf(str);
1185 }
1186
1187 /*
1188 * Interfaces to iterate over real cpu information, but only that info
1189 * which we choose to expose here. These are of interest to dom0
1190 * only (and the backing hypercall should not work for domu).
1191 */
1192
1193 xen_mc_lcpu_cookie_t
xen_physcpu_next(xen_mc_lcpu_cookie_t cookie)1194 xen_physcpu_next(xen_mc_lcpu_cookie_t cookie)
1195 {
1196 xen_mc_logical_cpu_t *xcp = (xen_mc_logical_cpu_t *)cookie;
1197
1198 if (!DOMAIN_IS_INITDOMAIN(xen_info))
1199 return (NULL);
1200
1201 if (cookie == NULL)
1202 return ((xen_mc_lcpu_cookie_t)xen_phys_cpus);
1203
1204 if (xcp == xen_phys_cpus + xen_phys_ncpus - 1)
1205 return (NULL);
1206 else
1207 return ((xen_mc_lcpu_cookie_t)++xcp);
1208 }
1209
1210 #define COOKIE2XCP(c) ((xen_mc_logical_cpu_t *)(c))
1211
1212 const char *
xen_physcpu_vendorstr(xen_mc_lcpu_cookie_t cookie)1213 xen_physcpu_vendorstr(xen_mc_lcpu_cookie_t cookie)
1214 {
1215 xen_mc_logical_cpu_t *xcp = COOKIE2XCP(cookie);
1216
1217 return ((const char *)&xcp->mc_vendorid[0]);
1218 }
1219
1220 int
xen_physcpu_family(xen_mc_lcpu_cookie_t cookie)1221 xen_physcpu_family(xen_mc_lcpu_cookie_t cookie)
1222 {
1223 return (COOKIE2XCP(cookie)->mc_family);
1224 }
1225
1226 int
xen_physcpu_model(xen_mc_lcpu_cookie_t cookie)1227 xen_physcpu_model(xen_mc_lcpu_cookie_t cookie)
1228 {
1229 return (COOKIE2XCP(cookie)->mc_model);
1230 }
1231
1232 int
xen_physcpu_stepping(xen_mc_lcpu_cookie_t cookie)1233 xen_physcpu_stepping(xen_mc_lcpu_cookie_t cookie)
1234 {
1235 return (COOKIE2XCP(cookie)->mc_step);
1236 }
1237
1238 id_t
xen_physcpu_chipid(xen_mc_lcpu_cookie_t cookie)1239 xen_physcpu_chipid(xen_mc_lcpu_cookie_t cookie)
1240 {
1241 return (COOKIE2XCP(cookie)->mc_chipid);
1242 }
1243
1244 id_t
xen_physcpu_coreid(xen_mc_lcpu_cookie_t cookie)1245 xen_physcpu_coreid(xen_mc_lcpu_cookie_t cookie)
1246 {
1247 return (COOKIE2XCP(cookie)->mc_coreid);
1248 }
1249
1250 id_t
xen_physcpu_strandid(xen_mc_lcpu_cookie_t cookie)1251 xen_physcpu_strandid(xen_mc_lcpu_cookie_t cookie)
1252 {
1253 return (COOKIE2XCP(cookie)->mc_threadid);
1254 }
1255
1256 id_t
xen_physcpu_initial_apicid(xen_mc_lcpu_cookie_t cookie)1257 xen_physcpu_initial_apicid(xen_mc_lcpu_cookie_t cookie)
1258 {
1259 return (COOKIE2XCP(cookie)->mc_clusterid);
1260 }
1261
1262 id_t
xen_physcpu_logical_id(xen_mc_lcpu_cookie_t cookie)1263 xen_physcpu_logical_id(xen_mc_lcpu_cookie_t cookie)
1264 {
1265 return (COOKIE2XCP(cookie)->mc_cpunr);
1266 }
1267
1268 boolean_t
xen_physcpu_is_cmt(xen_mc_lcpu_cookie_t cookie)1269 xen_physcpu_is_cmt(xen_mc_lcpu_cookie_t cookie)
1270 {
1271 return (COOKIE2XCP(cookie)->mc_nthreads > 1);
1272 }
1273
1274 uint64_t
xen_physcpu_mcg_cap(xen_mc_lcpu_cookie_t cookie)1275 xen_physcpu_mcg_cap(xen_mc_lcpu_cookie_t cookie)
1276 {
1277 xen_mc_logical_cpu_t *xcp = COOKIE2XCP(cookie);
1278
1279 /*
1280 * Need to #define the indices, or search through the array.
1281 */
1282 return (xcp->mc_msrvalues[0].value);
1283 }
1284
1285 int
xen_map_gref(uint_t cmd,gnttab_map_grant_ref_t * mapop,uint_t count,boolean_t uvaddr)1286 xen_map_gref(uint_t cmd, gnttab_map_grant_ref_t *mapop, uint_t count,
1287 boolean_t uvaddr)
1288 {
1289 long rc;
1290 uint_t i;
1291
1292 ASSERT(cmd == GNTTABOP_map_grant_ref);
1293
1294 #if !defined(_BOOT)
1295 if (uvaddr == B_FALSE) {
1296 for (i = 0; i < count; ++i) {
1297 mapop[i].flags |= (PT_FOREIGN <<_GNTMAP_guest_avail0);
1298 }
1299 }
1300 #endif
1301
1302 rc = HYPERVISOR_grant_table_op(cmd, mapop, count);
1303
1304 return (rc);
1305 }
1306
1307 static int
xpv_get_physinfo(xen_sysctl_physinfo_t * pi)1308 xpv_get_physinfo(xen_sysctl_physinfo_t *pi)
1309 {
1310 xen_sysctl_t op;
1311 struct sp { void *p; } *sp = (struct sp *)&op.u.physinfo.cpu_to_node;
1312 int ret;
1313
1314 bzero(&op, sizeof (op));
1315 op.cmd = XEN_SYSCTL_physinfo;
1316 op.interface_version = XEN_SYSCTL_INTERFACE_VERSION;
1317 /*LINTED: constant in conditional context*/
1318 set_xen_guest_handle(*sp, NULL);
1319
1320 ret = HYPERVISOR_sysctl(&op);
1321
1322 if (ret != 0)
1323 return (xen_xlate_errcode(ret));
1324
1325 bcopy(&op.u.physinfo, pi, sizeof (op.u.physinfo));
1326 return (0);
1327 }
1328
1329 /*
1330 * On dom0, we can determine the number of physical cpus on the machine.
1331 * This number is important when figuring out what workarounds are
1332 * appropriate, so compute it now.
1333 */
1334 uint_t
xpv_nr_phys_cpus(void)1335 xpv_nr_phys_cpus(void)
1336 {
1337 static uint_t nphyscpus = 0;
1338
1339 ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1340
1341 if (nphyscpus == 0) {
1342 xen_sysctl_physinfo_t pi;
1343 int ret;
1344
1345 if ((ret = xpv_get_physinfo(&pi)) != 0)
1346 panic("xpv_get_physinfo() failed: %d\n", ret);
1347 nphyscpus = pi.nr_cpus;
1348 }
1349 return (nphyscpus);
1350 }
1351
1352 pgcnt_t
xpv_nr_phys_pages(void)1353 xpv_nr_phys_pages(void)
1354 {
1355 xen_sysctl_physinfo_t pi;
1356 int ret;
1357
1358 ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1359
1360 if ((ret = xpv_get_physinfo(&pi)) != 0)
1361 panic("xpv_get_physinfo() failed: %d\n", ret);
1362
1363 return ((pgcnt_t)pi.total_pages);
1364 }
1365
1366 uint64_t
xpv_cpu_khz(void)1367 xpv_cpu_khz(void)
1368 {
1369 xen_sysctl_physinfo_t pi;
1370 int ret;
1371
1372 ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1373
1374 if ((ret = xpv_get_physinfo(&pi)) != 0)
1375 panic("xpv_get_physinfo() failed: %d\n", ret);
1376 return ((uint64_t)pi.cpu_khz);
1377 }
1378