xref: /titanic_52/usr/src/uts/i86xpv/os/xen_machdep.c (revision cd11837edb943ce20ca539d505e60b469f89bf20)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /* derived from netbsd's xen_machdep.c 1.1.2.1 */
28 
29 /*
30  *
31  * Copyright (c) 2004 Christian Limpach.
32  * All rights reserved.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  * 1. Redistributions of source code must retain the above copyright
38  *    notice, this list of conditions and the following disclaimer.
39  * 2. Redistributions in binary form must reproduce the above copyright
40  *    notice, this list of conditions and the following disclaimer in the
41  *    documentation and/or other materials provided with the distribution.
42  * 3. This section intentionally left blank.
43  * 4. The name of the author may not be used to endorse or promote products
44  *    derived from this software without specific prior written permission.
45  *
46  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
47  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
48  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
49  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
50  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
52  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
53  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
54  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
55  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
56  */
57 /*
58  * Section 3 of the above license was updated in response to bug 6379571.
59  */
60 
61 #include <sys/ctype.h>
62 #include <sys/types.h>
63 #include <sys/cmn_err.h>
64 #include <sys/trap.h>
65 #include <sys/segments.h>
66 #include <sys/hypervisor.h>
67 #include <sys/xen_mmu.h>
68 #include <sys/machsystm.h>
69 #include <sys/promif.h>
70 #include <sys/bootconf.h>
71 #include <sys/bootinfo.h>
72 #include <sys/cpr.h>
73 #include <sys/taskq.h>
74 #include <sys/uadmin.h>
75 #include <sys/evtchn_impl.h>
76 #include <sys/archsystm.h>
77 #include <xen/sys/xenbus_impl.h>
78 #include <sys/mach_mmu.h>
79 #include <vm/hat_i86.h>
80 #include <sys/gnttab.h>
81 #include <sys/reboot.h>
82 #include <sys/stack.h>
83 #include <sys/clock.h>
84 #include <sys/bitmap.h>
85 #include <sys/processor.h>
86 #include <sys/xen_errno.h>
87 #include <sys/xpv_panic.h>
88 #include <sys/smp_impldefs.h>
89 #include <sys/cpu.h>
90 #include <sys/balloon_impl.h>
91 #include <sys/ddi.h>
92 
93 #ifdef DEBUG
94 #define	SUSPEND_DEBUG if (xen_suspend_debug) xen_printf
95 #else
96 #define	SUSPEND_DEBUG(...)
97 #endif
98 
99 int cpr_debug;
100 cpuset_t cpu_suspend_lost_set;
101 static int xen_suspend_debug;
102 
103 uint_t xen_phys_ncpus;
104 xen_mc_logical_cpu_t *xen_phys_cpus;
105 int xen_physinfo_debug = 0;
106 
107 /*
108  * Determine helpful version information.
109  *
110  * (And leave copies in the data segment so we can look at them later
111  * with e.g. kmdb.)
112  */
113 
114 typedef enum xen_version {
115 	XENVER_BOOT_IDX,
116 	XENVER_CURRENT_IDX
117 } xen_version_t;
118 
119 struct xenver {
120 	ulong_t xv_major;
121 	ulong_t xv_minor;
122 	ulong_t xv_revision;
123 	xen_extraversion_t xv_ver;
124 	ulong_t xv_is_xvm;
125 	xen_changeset_info_t xv_chgset;
126 	xen_compile_info_t xv_build;
127 	xen_capabilities_info_t xv_caps;
128 } xenver[2];
129 
130 #define	XENVER_BOOT(m)	(xenver[XENVER_BOOT_IDX].m)
131 #define	XENVER_CURRENT(m)	(xenver[XENVER_CURRENT_IDX].m)
132 
133 /*
134  * Update the xenver data. We maintain two copies, boot and
135  * current. If we are setting the boot, then also set current.
136  */
137 static void
138 xen_set_version(xen_version_t idx)
139 {
140 	ulong_t ver;
141 
142 	bzero(&xenver[idx], sizeof (xenver[idx]));
143 
144 	ver = HYPERVISOR_xen_version(XENVER_version, 0);
145 
146 	xenver[idx].xv_major = BITX(ver, 31, 16);
147 	xenver[idx].xv_minor = BITX(ver, 15, 0);
148 
149 	(void) HYPERVISOR_xen_version(XENVER_extraversion, &xenver[idx].xv_ver);
150 
151 	/*
152 	 * The revision is buried in the extraversion information that is
153 	 * maintained by the hypervisor. For our purposes we expect that
154 	 * the revision number is:
155 	 * 	- the second character in the extraversion information
156 	 *	- one character long
157 	 *	- numeric digit
158 	 * If it isn't then we can't extract the revision and we leave it
159 	 * set to 0.
160 	 */
161 	if (strlen(xenver[idx].xv_ver) > 1 && isdigit(xenver[idx].xv_ver[1]))
162 		xenver[idx].xv_revision = xenver[idx].xv_ver[1] - '0';
163 	else
164 		cmn_err(CE_WARN, "Cannot extract revision on this hypervisor "
165 		    "version: v%s, unexpected version format",
166 		    xenver[idx].xv_ver);
167 
168 	xenver[idx].xv_is_xvm = 0;
169 
170 	if (strlen(xenver[idx].xv_ver) >= 4 &&
171 	    strncmp(xenver[idx].xv_ver + strlen(xenver[idx].xv_ver) - 4,
172 	    "-xvm", 4) == 0)
173 		xenver[idx].xv_is_xvm = 1;
174 
175 	(void) HYPERVISOR_xen_version(XENVER_changeset,
176 	    &xenver[idx].xv_chgset);
177 
178 	(void) HYPERVISOR_xen_version(XENVER_compile_info,
179 	    &xenver[idx].xv_build);
180 	/*
181 	 * Capabilities are a set of space separated ascii strings
182 	 * e.g. 'xen-3.1-x86_32p' or 'hvm-3.2-x86_64'
183 	 */
184 	(void) HYPERVISOR_xen_version(XENVER_capabilities,
185 	    &xenver[idx].xv_caps);
186 
187 	cmn_err(CE_CONT, "?v%lu.%lu%s chgset '%s'\n", xenver[idx].xv_major,
188 	    xenver[idx].xv_minor, xenver[idx].xv_ver, xenver[idx].xv_chgset);
189 
190 	if (idx == XENVER_BOOT_IDX)
191 		bcopy(&xenver[XENVER_BOOT_IDX], &xenver[XENVER_CURRENT_IDX],
192 		    sizeof (xenver[XENVER_BOOT_IDX]));
193 }
194 
195 typedef enum xen_hypervisor_check {
196 	XEN_RUN_CHECK,
197 	XEN_SUSPEND_CHECK
198 } xen_hypervisor_check_t;
199 
200 /*
201  * To run the hypervisor must be 3.0.4 or better. To suspend/resume
202  * we need 3.0.4 or better and if it is 3.0.4. then it must be provided
203  * by the Solaris xVM project.
204  * Checking can be disabled for testing purposes by setting the
205  * xen_suspend_debug variable.
206  */
207 static int
208 xen_hypervisor_supports_solaris(xen_hypervisor_check_t check)
209 {
210 	if (xen_suspend_debug == 1)
211 		return (1);
212 	if (XENVER_CURRENT(xv_major) < 3)
213 		return (0);
214 	if (XENVER_CURRENT(xv_major) > 3)
215 		return (1);
216 	if (XENVER_CURRENT(xv_minor) > 0)
217 		return (1);
218 	if (XENVER_CURRENT(xv_revision) < 4)
219 		return (0);
220 	if (check == XEN_SUSPEND_CHECK && XENVER_CURRENT(xv_revision) == 4 &&
221 	    !XENVER_CURRENT(xv_is_xvm))
222 		return (0);
223 
224 	return (1);
225 }
226 
227 /*
228  * If the hypervisor is -xvm, or 3.1.2 or higher, we don't need the
229  * workaround.
230  */
231 static void
232 xen_pte_workaround(void)
233 {
234 #if defined(__amd64)
235 	extern int pt_kern;
236 
237 	if (XENVER_CURRENT(xv_major) != 3)
238 		return;
239 	if (XENVER_CURRENT(xv_minor) > 1)
240 		return;
241 	if (XENVER_CURRENT(xv_minor) == 1 &&
242 	    XENVER_CURRENT(xv_revision) > 1)
243 		return;
244 	if (XENVER_CURRENT(xv_is_xvm))
245 		return;
246 
247 	pt_kern = PT_USER;
248 #endif
249 }
250 
251 void
252 xen_set_callback(void (*func)(void), uint_t type, uint_t flags)
253 {
254 	struct callback_register cb;
255 
256 	bzero(&cb, sizeof (cb));
257 #if defined(__amd64)
258 	cb.address = (ulong_t)func;
259 #elif defined(__i386)
260 	cb.address.cs = KCS_SEL;
261 	cb.address.eip = (ulong_t)func;
262 #endif
263 	cb.type = type;
264 	cb.flags = flags;
265 
266 	/*
267 	 * XXPV always ignore return value for NMI
268 	 */
269 	if (HYPERVISOR_callback_op(CALLBACKOP_register, &cb) != 0 &&
270 	    type != CALLBACKTYPE_nmi)
271 		panic("HYPERVISOR_callback_op failed");
272 }
273 
274 void
275 xen_init_callbacks(void)
276 {
277 	/*
278 	 * register event (interrupt) handler.
279 	 */
280 	xen_set_callback(xen_callback, CALLBACKTYPE_event, 0);
281 
282 	/*
283 	 * failsafe handler.
284 	 */
285 	xen_set_callback(xen_failsafe_callback, CALLBACKTYPE_failsafe,
286 	    CALLBACKF_mask_events);
287 
288 	/*
289 	 * NMI handler.
290 	 */
291 	xen_set_callback(nmiint, CALLBACKTYPE_nmi, 0);
292 
293 	/*
294 	 * system call handler
295 	 * XXPV move to init_cpu_syscall?
296 	 */
297 #if defined(__amd64)
298 	xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
299 	    CALLBACKF_mask_events);
300 #endif	/* __amd64 */
301 }
302 
303 
304 /*
305  * cmn_err() followed by a 1/4 second delay; this gives the
306  * logging service a chance to flush messages and helps avoid
307  * intermixing output from prom_printf().
308  * XXPV: doesn't exactly help us on UP though.
309  */
310 /*PRINTFLIKE2*/
311 void
312 cpr_err(int ce, const char *fmt, ...)
313 {
314 	va_list adx;
315 
316 	va_start(adx, fmt);
317 	vcmn_err(ce, fmt, adx);
318 	va_end(adx);
319 	drv_usecwait(MICROSEC >> 2);
320 }
321 
322 void
323 xen_suspend_devices(void)
324 {
325 	int rc;
326 
327 	SUSPEND_DEBUG("xen_suspend_devices\n");
328 
329 	if ((rc = cpr_suspend_devices(ddi_root_node())) != 0)
330 		panic("failed to suspend devices: %d", rc);
331 }
332 
333 void
334 xen_resume_devices(void)
335 {
336 	int rc;
337 
338 	SUSPEND_DEBUG("xen_resume_devices\n");
339 
340 	if ((rc = cpr_resume_devices(ddi_root_node(), 0)) != 0)
341 		panic("failed to resume devices: %d", rc);
342 }
343 
344 /*
345  * The list of mfn pages is out of date.  Recompute it.
346  */
347 static void
348 rebuild_mfn_list(void)
349 {
350 	int i = 0;
351 	size_t sz;
352 	size_t off;
353 	pfn_t pfn;
354 
355 	SUSPEND_DEBUG("rebuild_mfn_list\n");
356 
357 	sz = ((mfn_count * sizeof (mfn_t)) + MMU_PAGEOFFSET) & MMU_PAGEMASK;
358 
359 	for (off = 0; off < sz; off += MMU_PAGESIZE) {
360 		size_t j = mmu_btop(off);
361 		if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) {
362 			pfn = hat_getpfnum(kas.a_hat,
363 			    (caddr_t)&mfn_list_pages[j]);
364 			mfn_list_pages_page[i++] = pfn_to_mfn(pfn);
365 		}
366 
367 		pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list + off);
368 		mfn_list_pages[j] = pfn_to_mfn(pfn);
369 	}
370 
371 	pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list_pages_page);
372 	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
373 	    = pfn_to_mfn(pfn);
374 }
375 
376 static void
377 suspend_cpus(void)
378 {
379 	int i;
380 
381 	SUSPEND_DEBUG("suspend_cpus\n");
382 
383 	mp_enter_barrier();
384 
385 	for (i = 1; i < ncpus; i++) {
386 		if (!CPU_IN_SET(cpu_suspend_lost_set, i)) {
387 			SUSPEND_DEBUG("xen_vcpu_down %d\n", i);
388 			(void) xen_vcpu_down(i);
389 		}
390 
391 		mach_cpucontext_reset(cpu[i]);
392 	}
393 }
394 
395 static void
396 resume_cpus(void)
397 {
398 	int i;
399 
400 	for (i = 1; i < ncpus; i++) {
401 		if (cpu[i] == NULL)
402 			continue;
403 
404 		if (!CPU_IN_SET(cpu_suspend_lost_set, i)) {
405 			SUSPEND_DEBUG("xen_vcpu_up %d\n", i);
406 			mach_cpucontext_restore(cpu[i]);
407 			(void) xen_vcpu_up(i);
408 		}
409 	}
410 
411 	mp_leave_barrier();
412 }
413 
414 /*
415  * Top level routine to direct suspend/resume of a domain.
416  */
417 void
418 xen_suspend_domain(void)
419 {
420 	extern void rtcsync(void);
421 	extern hrtime_t hres_last_tick;
422 	mfn_t start_info_mfn;
423 	ulong_t flags;
424 	pfn_t pfn;
425 	int i;
426 
427 	/*
428 	 * Check that we are happy to suspend on this hypervisor.
429 	 */
430 	if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) {
431 		cpr_err(CE_WARN, "Cannot suspend on this hypervisor "
432 		    "version: v%lu.%lu%s, need at least version v3.0.4 or "
433 		    "-xvm based hypervisor", XENVER_CURRENT(xv_major),
434 		    XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver));
435 		return;
436 	}
437 
438 	/*
439 	 * XXPV - Are we definitely OK to suspend by the time we've connected
440 	 * the handler?
441 	 */
442 
443 	cpr_err(CE_NOTE, "Domain suspending for save/migrate");
444 
445 	SUSPEND_DEBUG("xen_suspend_domain\n");
446 
447 	/*
448 	 * suspend interrupts and devices
449 	 * XXPV - we use suspend/resume for both save/restore domains (like sun
450 	 * cpr) and for migration.  Would be nice to know the difference if
451 	 * possible.  For save/restore where down time may be a long time, we
452 	 * may want to do more of the things that cpr does.  (i.e. notify user
453 	 * processes, shrink memory footprint for faster restore, etc.)
454 	 */
455 	xen_suspend_devices();
456 	SUSPEND_DEBUG("xenbus_suspend\n");
457 	xenbus_suspend();
458 
459 	pfn = hat_getpfnum(kas.a_hat, (caddr_t)xen_info);
460 	start_info_mfn = pfn_to_mfn(pfn);
461 
462 	/*
463 	 * XXPV: cpu hotplug can hold this under a xenbus watch. Are we safe
464 	 * wrt xenbus being suspended here?
465 	 */
466 	mutex_enter(&cpu_lock);
467 
468 	/*
469 	 * Suspend must be done on vcpu 0, as no context for other CPUs is
470 	 * saved.
471 	 *
472 	 * XXPV - add to taskq API ?
473 	 */
474 	thread_affinity_set(curthread, 0);
475 	kpreempt_disable();
476 
477 	SUSPEND_DEBUG("xen_start_migrate\n");
478 	xen_start_migrate();
479 	if (ncpus > 1)
480 		suspend_cpus();
481 
482 	/*
483 	 * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
484 	 * any holder would have dropped it to get through suspend_cpus().
485 	 */
486 	mutex_enter(&ec_lock);
487 
488 	/*
489 	 * From here on in, we can't take locks.
490 	 */
491 	SUSPEND_DEBUG("ec_suspend\n");
492 	ec_suspend();
493 	SUSPEND_DEBUG("gnttab_suspend\n");
494 	gnttab_suspend();
495 
496 	flags = intr_clear();
497 
498 	xpv_time_suspend();
499 
500 	/*
501 	 * Currently, the hypervisor incorrectly fails to bring back
502 	 * powered-down VCPUs.  Thus we need to record any powered-down VCPUs
503 	 * to prevent any attempts to operate on them.  But we have to do this
504 	 * *after* the very first time we do ec_suspend().
505 	 */
506 	for (i = 1; i < ncpus; i++) {
507 		if (cpu[i] == NULL)
508 			continue;
509 
510 		if (cpu_get_state(cpu[i]) == P_POWEROFF)
511 			CPUSET_ATOMIC_ADD(cpu_suspend_lost_set, i);
512 	}
513 
514 	/*
515 	 * The dom0 save/migrate code doesn't automatically translate
516 	 * these into PFNs, but expects them to be, so we do it here.
517 	 * We don't use mfn_to_pfn() because so many OS services have
518 	 * been disabled at this point.
519 	 */
520 	xen_info->store_mfn = mfn_to_pfn_mapping[xen_info->store_mfn];
521 	xen_info->console.domU.mfn =
522 	    mfn_to_pfn_mapping[xen_info->console.domU.mfn];
523 
524 	if (CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0) {
525 		prom_printf("xen_suspend_domain(): "
526 		    "CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask not set\n");
527 		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
528 	}
529 
530 	if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
531 	    0, UVMF_INVLPG)) {
532 		prom_printf("xen_suspend_domain(): "
533 		    "HYPERVISOR_update_va_mapping() failed\n");
534 		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
535 	}
536 
537 	SUSPEND_DEBUG("HYPERVISOR_suspend\n");
538 
539 	/*
540 	 * At this point we suspend and sometime later resume.
541 	 */
542 	if (HYPERVISOR_suspend(start_info_mfn)) {
543 		prom_printf("xen_suspend_domain(): "
544 		    "HYPERVISOR_suspend() failed\n");
545 		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
546 	}
547 
548 	/*
549 	 * Point HYPERVISOR_shared_info to its new value.
550 	 */
551 	if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
552 	    xen_info->shared_info | PT_NOCONSIST | PT_VALID | PT_WRITABLE,
553 	    UVMF_INVLPG))
554 		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
555 
556 	if (xen_info->nr_pages != mfn_count) {
557 		prom_printf("xen_suspend_domain(): number of pages"
558 		    " changed, was 0x%lx, now 0x%lx\n", mfn_count,
559 		    xen_info->nr_pages);
560 		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
561 	}
562 
563 	xpv_time_resume();
564 
565 	cached_max_mfn = 0;
566 
567 	SUSPEND_DEBUG("gnttab_resume\n");
568 	gnttab_resume();
569 
570 	/* XXPV: add a note that this must be lockless. */
571 	SUSPEND_DEBUG("ec_resume\n");
572 	ec_resume();
573 
574 	intr_restore(flags);
575 
576 	if (ncpus > 1)
577 		resume_cpus();
578 
579 	mutex_exit(&ec_lock);
580 	xen_end_migrate();
581 	mutex_exit(&cpu_lock);
582 
583 	/*
584 	 * Now we can take locks again.
585 	 */
586 
587 	/*
588 	 * Force the tick value used for tv_nsec in hres_tick() to be up to
589 	 * date. rtcsync() will reset the hrestime value appropriately.
590 	 */
591 	hres_last_tick = xpv_gethrtime();
592 
593 	/*
594 	 * XXPV: we need to have resumed the CPUs since this takes locks, but
595 	 * can remote CPUs see bad state? Presumably yes. Should probably nest
596 	 * taking of todlock inside of cpu_lock, or vice versa, then provide an
597 	 * unlocked version.  Probably need to call clkinitf to reset cpu freq
598 	 * and re-calibrate if we migrated to a different speed cpu.  Also need
599 	 * to make a (re)init_cpu_info call to update processor info structs
600 	 * and device tree info.  That remains to be written at the moment.
601 	 */
602 	rtcsync();
603 
604 	rebuild_mfn_list();
605 
606 	SUSPEND_DEBUG("xenbus_resume\n");
607 	xenbus_resume();
608 	SUSPEND_DEBUG("xenbus_resume_devices\n");
609 	xen_resume_devices();
610 
611 	thread_affinity_clear(curthread);
612 	kpreempt_enable();
613 
614 	SUSPEND_DEBUG("finished xen_suspend_domain\n");
615 
616 	/*
617 	 * We have restarted our suspended domain, update the hypervisor
618 	 * details. NB: This must be done at the end of this function,
619 	 * since we need the domain to be completely resumed before
620 	 * these functions will work correctly.
621 	 */
622 	xen_set_version(XENVER_CURRENT_IDX);
623 
624 	/*
625 	 * We can check and report a warning, but we don't stop the
626 	 * process.
627 	 */
628 	if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0)
629 		cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s "
630 		    "but need at least version v3.0.4",
631 		    XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor),
632 		    XENVER_CURRENT(xv_ver));
633 
634 	cmn_err(CE_NOTE, "domain restore/migrate completed");
635 }
636 
637 /*ARGSUSED*/
638 int
639 xen_debug_handler(void *arg)
640 {
641 	debug_enter("External debug event received");
642 
643 	/*
644 	 * If we've not got KMDB loaded, output some stuff difficult to capture
645 	 * from a domain core.
646 	 */
647 	if (!(boothowto & RB_DEBUG)) {
648 		shared_info_t *si = HYPERVISOR_shared_info;
649 		int i;
650 
651 		prom_printf("evtchn_pending [ ");
652 		for (i = 0; i < 8; i++)
653 			prom_printf("%lx ", si->evtchn_pending[i]);
654 		prom_printf("]\nevtchn_mask [ ");
655 		for (i = 0; i < 8; i++)
656 			prom_printf("%lx ", si->evtchn_mask[i]);
657 		prom_printf("]\n");
658 
659 		for (i = 0; i < ncpus; i++) {
660 			vcpu_info_t *vcpu = &si->vcpu_info[i];
661 			if (cpu[i] == NULL)
662 				continue;
663 			prom_printf("CPU%d pending %d mask %d sel %lx\n",
664 			    i, vcpu->evtchn_upcall_pending,
665 			    vcpu->evtchn_upcall_mask,
666 			    vcpu->evtchn_pending_sel);
667 		}
668 	}
669 
670 	return (0);
671 }
672 
673 /*ARGSUSED*/
674 static void
675 xen_sysrq_handler(struct xenbus_watch *watch, const char **vec,
676     unsigned int len)
677 {
678 	xenbus_transaction_t xbt;
679 	char key = '\0';
680 	int ret;
681 
682 retry:
683 	if (xenbus_transaction_start(&xbt)) {
684 		cmn_err(CE_WARN, "failed to start sysrq transaction");
685 		return;
686 	}
687 
688 	if ((ret = xenbus_scanf(xbt, "control", "sysrq", "%c", &key)) != 0) {
689 		/*
690 		 * ENOENT happens in response to our own xenbus_rm.
691 		 * XXPV - this happens spuriously on boot?
692 		 */
693 		if (ret != ENOENT)
694 			cmn_err(CE_WARN, "failed to read sysrq: %d", ret);
695 		goto out;
696 	}
697 
698 	if ((ret = xenbus_rm(xbt, "control", "sysrq")) != 0) {
699 		cmn_err(CE_WARN, "failed to reset sysrq: %d", ret);
700 		goto out;
701 	}
702 
703 	if (xenbus_transaction_end(xbt, 0) == EAGAIN)
704 		goto retry;
705 
706 	/*
707 	 * Somewhat arbitrary - on Linux this means 'reboot'. We could just
708 	 * accept any key, but this might increase the risk of sending a
709 	 * harmless sysrq to the wrong domain...
710 	 */
711 	if (key == 'b')
712 		(void) xen_debug_handler(NULL);
713 	else
714 		cmn_err(CE_WARN, "Ignored sysrq %c", key);
715 	return;
716 
717 out:
718 	(void) xenbus_transaction_end(xbt, 1);
719 }
720 
721 taskq_t *xen_shutdown_tq;
722 
723 #define	SHUTDOWN_INVALID	-1
724 #define	SHUTDOWN_POWEROFF	0
725 #define	SHUTDOWN_REBOOT		1
726 #define	SHUTDOWN_SUSPEND	2
727 #define	SHUTDOWN_HALT		3
728 #define	SHUTDOWN_MAX		4
729 
730 #define	SHUTDOWN_TIMEOUT_SECS (60 * 5)
731 
732 static const char *cmd_strings[SHUTDOWN_MAX] = {
733 	"poweroff",
734 	"reboot",
735 	"suspend",
736 	"halt"
737 };
738 
739 static void
740 xen_dirty_shutdown(void *arg)
741 {
742 	int cmd = (uintptr_t)arg;
743 
744 	cmn_err(CE_WARN, "Externally requested shutdown failed or "
745 	    "timed out.\nShutting down.\n");
746 
747 	switch (cmd) {
748 	case SHUTDOWN_HALT:
749 	case SHUTDOWN_POWEROFF:
750 		(void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred);
751 		break;
752 	case SHUTDOWN_REBOOT:
753 		(void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred);
754 		break;
755 	}
756 }
757 
758 static void
759 xen_shutdown(void *arg)
760 {
761 	int cmd = (uintptr_t)arg;
762 	proc_t *initpp;
763 
764 	ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX);
765 
766 	if (cmd == SHUTDOWN_SUSPEND) {
767 		xen_suspend_domain();
768 		return;
769 	}
770 
771 	switch (cmd) {
772 	case SHUTDOWN_POWEROFF:
773 		force_shutdown_method = AD_POWEROFF;
774 		break;
775 	case SHUTDOWN_HALT:
776 		force_shutdown_method = AD_HALT;
777 		break;
778 	case SHUTDOWN_REBOOT:
779 		force_shutdown_method = AD_BOOT;
780 		break;
781 	}
782 
783 	/*
784 	 * If we're still booting and init(1) isn't set up yet, simply halt.
785 	 */
786 	mutex_enter(&pidlock);
787 	initpp = prfind(P_INITPID);
788 	mutex_exit(&pidlock);
789 	if (initpp == NULL) {
790 		extern void halt(char *);
791 		halt("Power off the System");   /* just in case */
792 	}
793 
794 	/*
795 	 * else, graceful shutdown with inittab and all getting involved
796 	 */
797 	psignal(initpp, SIGPWR);
798 
799 	(void) timeout(xen_dirty_shutdown, arg,
800 	    SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC));
801 }
802 
803 /*ARGSUSED*/
804 static void
805 xen_shutdown_handler(struct xenbus_watch *watch, const char **vec,
806 	unsigned int len)
807 {
808 	char *str;
809 	xenbus_transaction_t xbt;
810 	int err, shutdown_code = SHUTDOWN_INVALID;
811 	unsigned int slen;
812 
813 again:
814 	err = xenbus_transaction_start(&xbt);
815 	if (err)
816 		return;
817 	if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) {
818 		(void) xenbus_transaction_end(xbt, 1);
819 		return;
820 	}
821 
822 	SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str);
823 
824 	/*
825 	 * If this is a watch fired from our write below, check out early to
826 	 * avoid an infinite loop.
827 	 */
828 	if (strcmp(str, "") == 0) {
829 		(void) xenbus_transaction_end(xbt, 0);
830 		kmem_free(str, slen);
831 		return;
832 	} else if (strcmp(str, "poweroff") == 0) {
833 		shutdown_code = SHUTDOWN_POWEROFF;
834 	} else if (strcmp(str, "reboot") == 0) {
835 		shutdown_code = SHUTDOWN_REBOOT;
836 	} else if (strcmp(str, "suspend") == 0) {
837 		shutdown_code = SHUTDOWN_SUSPEND;
838 	} else if (strcmp(str, "halt") == 0) {
839 		shutdown_code = SHUTDOWN_HALT;
840 	} else {
841 		printf("Ignoring shutdown request: %s\n", str);
842 	}
843 
844 	/*
845 	 * XXPV	Should we check the value of xenbus_write() too, or are all
846 	 *	errors automatically folded into xenbus_transaction_end() ??
847 	 */
848 	(void) xenbus_write(xbt, "control", "shutdown", "");
849 	err = xenbus_transaction_end(xbt, 0);
850 	if (err == EAGAIN) {
851 		SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id);
852 		kmem_free(str, slen);
853 		goto again;
854 	}
855 
856 	kmem_free(str, slen);
857 	if (shutdown_code != SHUTDOWN_INVALID) {
858 		(void) taskq_dispatch(xen_shutdown_tq, xen_shutdown,
859 		    (void *)(intptr_t)shutdown_code, 0);
860 	}
861 }
862 
863 static struct xenbus_watch shutdown_watch;
864 static struct xenbus_watch sysrq_watch;
865 
866 void
867 xen_late_startup(void)
868 {
869 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
870 		xen_shutdown_tq = taskq_create("shutdown_taskq", 1,
871 		    maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
872 		shutdown_watch.node = "control/shutdown";
873 		shutdown_watch.callback = xen_shutdown_handler;
874 		if (register_xenbus_watch(&shutdown_watch))
875 			cmn_err(CE_WARN, "Failed to set shutdown watcher");
876 
877 		sysrq_watch.node = "control/sysrq";
878 		sysrq_watch.callback = xen_sysrq_handler;
879 		if (register_xenbus_watch(&sysrq_watch))
880 			cmn_err(CE_WARN, "Failed to set sysrq watcher");
881 	}
882 	balloon_init(xen_info->nr_pages);
883 }
884 
885 #ifdef DEBUG
886 #define	XEN_PRINTF_BUFSIZE	1024
887 
888 char xen_printf_buffer[XEN_PRINTF_BUFSIZE];
889 
890 /*
891  * Printf function that calls hypervisor directly.  For DomU it only
892  * works when running on a xen hypervisor built with debug on.  Works
893  * always since no I/O ring interaction is needed.
894  */
895 /*PRINTFLIKE1*/
896 void
897 xen_printf(const char *fmt, ...)
898 {
899 	va_list	ap;
900 
901 	va_start(ap, fmt);
902 	(void) vsnprintf(xen_printf_buffer, XEN_PRINTF_BUFSIZE, fmt, ap);
903 	va_end(ap);
904 
905 	(void) HYPERVISOR_console_io(CONSOLEIO_write,
906 	    strlen(xen_printf_buffer), xen_printf_buffer);
907 }
908 #else
909 void
910 xen_printf(const char *fmt, ...)
911 {
912 }
913 #endif	/* DEBUG */
914 
915 void
916 startup_xen_version(void)
917 {
918 	xen_set_version(XENVER_BOOT_IDX);
919 	if (xen_hypervisor_supports_solaris(XEN_RUN_CHECK) == 0)
920 		cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s "
921 		    "but need at least version v3.0.4",
922 		    XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor),
923 		    XENVER_CURRENT(xv_ver));
924 	xen_pte_workaround();
925 }
926 
927 int xen_mca_simulate_mc_physinfo_failure = 0;
928 
929 void
930 startup_xen_mca(void)
931 {
932 	if (!DOMAIN_IS_INITDOMAIN(xen_info))
933 		return;
934 
935 	xen_phys_ncpus = 0;
936 	xen_phys_cpus = NULL;
937 
938 	if (xen_mca_simulate_mc_physinfo_failure ||
939 	    xen_get_mc_physcpuinfo(NULL, &xen_phys_ncpus) != 0) {
940 		cmn_err(CE_WARN,
941 		    "%sxen_get_mc_physinfo failure during xen MCA startup: "
942 		    "there will be no machine check support",
943 		    xen_mca_simulate_mc_physinfo_failure ? "(simulated) " : "");
944 		return;
945 	}
946 
947 	xen_phys_cpus = kmem_alloc(xen_phys_ncpus *
948 	    sizeof (xen_mc_logical_cpu_t), KM_NOSLEEP);
949 
950 	if (xen_phys_cpus == NULL) {
951 		cmn_err(CE_WARN,
952 		    "xen_get_physinfo failure: can't allocate CPU array");
953 		return;
954 	}
955 
956 	if (xen_get_mc_physcpuinfo(xen_phys_cpus, &xen_phys_ncpus) != 0) {
957 		cmn_err(CE_WARN, "xen_get_mc_physinfo failure: no "
958 		    "physical CPU info");
959 		kmem_free(xen_phys_cpus,
960 		    xen_phys_ncpus * sizeof (xen_mc_logical_cpu_t));
961 		xen_phys_ncpus = 0;
962 		xen_phys_cpus = NULL;
963 	}
964 
965 	if (xen_physinfo_debug) {
966 		xen_mc_logical_cpu_t *xcp;
967 		unsigned i;
968 
969 		cmn_err(CE_NOTE, "xvm mca: %u physical cpus:\n",
970 		    xen_phys_ncpus);
971 		for (i = 0; i < xen_phys_ncpus; i++) {
972 			xcp = &xen_phys_cpus[i];
973 			cmn_err(CE_NOTE, "cpu%u: (%u, %u, %u) apid %u",
974 			    xcp->mc_cpunr, xcp->mc_chipid, xcp->mc_coreid,
975 			    xcp->mc_threadid, xcp->mc_apicid);
976 		}
977 	}
978 }
979 
980 /*
981  * Miscellaneous hypercall wrappers with slightly more verbose diagnostics.
982  */
983 
984 void
985 xen_set_gdt(ulong_t *frame_list, int entries)
986 {
987 	int err;
988 	if ((err = HYPERVISOR_set_gdt(frame_list, entries)) != 0) {
989 		/*
990 		 * X_EINVAL:	reserved entry or bad frames
991 		 * X_EFAULT:	bad address
992 		 */
993 		panic("xen_set_gdt(%p, %d): error %d",
994 		    (void *)frame_list, entries, -(int)err);
995 	}
996 }
997 
998 void
999 xen_set_ldt(user_desc_t *ldt, uint_t nsels)
1000 {
1001 	struct mmuext_op	op;
1002 	long			err;
1003 
1004 	op.cmd = MMUEXT_SET_LDT;
1005 	op.arg1.linear_addr = (uintptr_t)ldt;
1006 	op.arg2.nr_ents = nsels;
1007 
1008 	if ((err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) != 0) {
1009 		panic("xen_set_ldt(%p, %d): error %d",
1010 		    (void *)ldt, nsels, -(int)err);
1011 	}
1012 }
1013 
1014 void
1015 xen_stack_switch(ulong_t ss, ulong_t esp)
1016 {
1017 	long err;
1018 
1019 	if ((err = HYPERVISOR_stack_switch(ss, esp)) != 0) {
1020 		/*
1021 		 * X_EPERM:	bad selector
1022 		 */
1023 		panic("xen_stack_switch(%lx, %lx): error %d", ss, esp,
1024 		    -(int)err);
1025 	}
1026 }
1027 
1028 long
1029 xen_set_trap_table(trap_info_t *table)
1030 {
1031 	long err;
1032 
1033 	if ((err = HYPERVISOR_set_trap_table(table)) != 0) {
1034 		/*
1035 		 * X_EFAULT:	bad address
1036 		 * X_EPERM:	bad selector
1037 		 */
1038 		panic("xen_set_trap_table(%p): error %d", (void *)table,
1039 		    -(int)err);
1040 	}
1041 	return (err);
1042 }
1043 
1044 #if defined(__amd64)
1045 void
1046 xen_set_segment_base(int reg, ulong_t value)
1047 {
1048 	long err;
1049 
1050 	if ((err = HYPERVISOR_set_segment_base(reg, value)) != 0) {
1051 		/*
1052 		 * X_EFAULT:	bad address
1053 		 * X_EINVAL:	bad type
1054 		 */
1055 		panic("xen_set_segment_base(%d, %lx): error %d",
1056 		    reg, value, -(int)err);
1057 	}
1058 }
1059 #endif	/* __amd64 */
1060 
1061 /*
1062  * Translate a hypervisor errcode to a Solaris error code.
1063  */
1064 int
1065 xen_xlate_errcode(int error)
1066 {
1067 	switch (-error) {
1068 
1069 	/*
1070 	 * Translate hypervisor errno's into native errno's
1071 	 */
1072 
1073 #define	CASE(num)	case X_##num: error = num; break
1074 
1075 	CASE(EPERM);	CASE(ENOENT);	CASE(ESRCH);
1076 	CASE(EINTR);	CASE(EIO);	CASE(ENXIO);
1077 	CASE(E2BIG);	CASE(ENOMEM);	CASE(EACCES);
1078 	CASE(EFAULT);	CASE(EBUSY);	CASE(EEXIST);
1079 	CASE(ENODEV);	CASE(EISDIR);	CASE(EINVAL);
1080 	CASE(ENOSPC);	CASE(ESPIPE);	CASE(EROFS);
1081 	CASE(ENOSYS);	CASE(ENOTEMPTY); CASE(EISCONN);
1082 	CASE(ENODATA);
1083 
1084 #undef CASE
1085 
1086 	default:
1087 		panic("xen_xlate_errcode: unknown error %d", error);
1088 	}
1089 
1090 	return (error);
1091 }
1092 
1093 /*
1094  * Raise PS_IOPL on current vcpu to user level.
1095  * Caller responsible for preventing kernel preemption.
1096  */
1097 void
1098 xen_enable_user_iopl(void)
1099 {
1100 	physdev_set_iopl_t set_iopl;
1101 	set_iopl.iopl = 3;		/* user ring 3 */
1102 	(void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1103 }
1104 
1105 /*
1106  * Drop PS_IOPL on current vcpu to kernel level
1107  */
1108 void
1109 xen_disable_user_iopl(void)
1110 {
1111 	physdev_set_iopl_t set_iopl;
1112 	set_iopl.iopl = 1;		/* kernel pseudo ring 1 */
1113 	(void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1114 }
1115 
1116 int
1117 xen_gdt_setprot(cpu_t *cp, uint_t prot)
1118 {
1119 	int err;
1120 #if defined(__amd64)
1121 	int pt_bits = PT_VALID;
1122 	if (prot & PROT_WRITE)
1123 		pt_bits |= PT_WRITABLE;
1124 #endif
1125 
1126 	if ((err = as_setprot(&kas, (caddr_t)cp->cpu_gdt,
1127 	    MMU_PAGESIZE, prot)) != 0)
1128 		goto done;
1129 
1130 #if defined(__amd64)
1131 	err = xen_kpm_page(mmu_btop(cp->cpu_m.mcpu_gdtpa), pt_bits);
1132 #endif
1133 
1134 done:
1135 	if (err) {
1136 		cmn_err(CE_WARN, "cpu%d: xen_gdt_setprot(%s) failed: error %d",
1137 		    cp->cpu_id, (prot & PROT_WRITE) ? "writable" : "read-only",
1138 		    err);
1139 	}
1140 
1141 	return (err);
1142 }
1143 
1144 int
1145 xen_ldt_setprot(user_desc_t *ldt, size_t lsize, uint_t prot)
1146 {
1147 	int err;
1148 	caddr_t	lva = (caddr_t)ldt;
1149 #if defined(__amd64)
1150 	int pt_bits = PT_VALID;
1151 	pgcnt_t npgs;
1152 	if (prot & PROT_WRITE)
1153 		pt_bits |= PT_WRITABLE;
1154 #endif	/* __amd64 */
1155 
1156 	if ((err = as_setprot(&kas, (caddr_t)ldt, lsize, prot)) != 0)
1157 		goto done;
1158 
1159 #if defined(__amd64)
1160 
1161 	ASSERT(IS_P2ALIGNED(lsize, PAGESIZE));
1162 	npgs = mmu_btop(lsize);
1163 	while (npgs--) {
1164 		if ((err = xen_kpm_page(hat_getpfnum(kas.a_hat, lva),
1165 		    pt_bits)) != 0)
1166 			break;
1167 		lva += PAGESIZE;
1168 	}
1169 #endif	/* __amd64 */
1170 
1171 done:
1172 	if (err) {
1173 		cmn_err(CE_WARN, "xen_ldt_setprot(%p, %s) failed: error %d",
1174 		    (void *)lva,
1175 		    (prot & PROT_WRITE) ? "writable" : "read-only", err);
1176 	}
1177 
1178 	return (err);
1179 }
1180 
1181 int
1182 xen_get_physinfo(xen_sysctl_physinfo_t *pi)
1183 {
1184 	xen_sysctl_t op;
1185 	int ret;
1186 
1187 	bzero(&op, sizeof (op));
1188 	op.cmd = XEN_SYSCTL_physinfo;
1189 	op.interface_version = XEN_SYSCTL_INTERFACE_VERSION;
1190 
1191 	ret = HYPERVISOR_sysctl(&op);
1192 
1193 	if (ret != 0)
1194 		return (ret);
1195 
1196 	bcopy(&op.u.physinfo, pi, sizeof (op.u.physinfo));
1197 	return (0);
1198 }
1199 
1200 int
1201 xen_get_mc_physcpuinfo(xen_mc_logical_cpu_t *log_cpus, uint_t *ncpus)
1202 {
1203 	struct xen_mc_physcpuinfo cpi;
1204 
1205 	cpi.ncpus = *ncpus;
1206 	/*LINTED: constant in conditional context*/
1207 	set_xen_guest_handle(cpi.info, log_cpus);
1208 
1209 	if (HYPERVISOR_mca(XEN_MC_CMD_physcpuinfo, (xen_mc_arg_t *)&cpi) !=
1210 	    XEN_MC_HCALL_SUCCESS)
1211 		return (-1);
1212 
1213 	*ncpus = cpi.ncpus;
1214 	return (0);
1215 }
1216 
1217 void
1218 print_panic(const char *str)
1219 {
1220 	xen_printf(str);
1221 }
1222 
1223 /*
1224  * Interfaces to iterate over real cpu information, but only that info
1225  * which we choose to expose here.  These are of interest to dom0
1226  * only (and the backing hypercall should not work for domu).
1227  */
1228 
1229 xen_mc_lcpu_cookie_t
1230 xen_physcpu_next(xen_mc_lcpu_cookie_t cookie)
1231 {
1232 	xen_mc_logical_cpu_t *xcp = (xen_mc_logical_cpu_t *)cookie;
1233 
1234 	if (!DOMAIN_IS_INITDOMAIN(xen_info))
1235 		return (NULL);
1236 
1237 	if (cookie == NULL)
1238 		return ((xen_mc_lcpu_cookie_t)xen_phys_cpus);
1239 
1240 	if (xcp == xen_phys_cpus + xen_phys_ncpus - 1)
1241 		return (NULL);
1242 	else
1243 		return ((xen_mc_lcpu_cookie_t)++xcp);
1244 }
1245 
1246 #define	COOKIE2XCP(c) ((xen_mc_logical_cpu_t *)(c))
1247 
1248 const char *
1249 xen_physcpu_vendorstr(xen_mc_lcpu_cookie_t cookie)
1250 {
1251 	xen_mc_logical_cpu_t *xcp = COOKIE2XCP(cookie);
1252 
1253 	return ((const char *)&xcp->mc_vendorid[0]);
1254 }
1255 
1256 int
1257 xen_physcpu_family(xen_mc_lcpu_cookie_t cookie)
1258 {
1259 	return (COOKIE2XCP(cookie)->mc_family);
1260 }
1261 
1262 int
1263 xen_physcpu_model(xen_mc_lcpu_cookie_t cookie)
1264 {
1265 	return (COOKIE2XCP(cookie)->mc_model);
1266 }
1267 
1268 int
1269 xen_physcpu_stepping(xen_mc_lcpu_cookie_t cookie)
1270 {
1271 	return (COOKIE2XCP(cookie)->mc_step);
1272 }
1273 
1274 id_t
1275 xen_physcpu_chipid(xen_mc_lcpu_cookie_t cookie)
1276 {
1277 	return (COOKIE2XCP(cookie)->mc_chipid);
1278 }
1279 
1280 id_t
1281 xen_physcpu_coreid(xen_mc_lcpu_cookie_t cookie)
1282 {
1283 	return (COOKIE2XCP(cookie)->mc_coreid);
1284 }
1285 
1286 id_t
1287 xen_physcpu_strandid(xen_mc_lcpu_cookie_t cookie)
1288 {
1289 	return (COOKIE2XCP(cookie)->mc_threadid);
1290 }
1291 
1292 id_t
1293 xen_physcpu_logical_id(xen_mc_lcpu_cookie_t cookie)
1294 {
1295 	return (COOKIE2XCP(cookie)->mc_cpunr);
1296 }
1297 
1298 boolean_t
1299 xen_physcpu_is_cmt(xen_mc_lcpu_cookie_t cookie)
1300 {
1301 	return (COOKIE2XCP(cookie)->mc_nthreads > 1);
1302 }
1303 
1304 uint64_t
1305 xen_physcpu_mcg_cap(xen_mc_lcpu_cookie_t cookie)
1306 {
1307 	xen_mc_logical_cpu_t *xcp = COOKIE2XCP(cookie);
1308 
1309 	/*
1310 	 * Need to #define the indices, or search through the array.
1311 	 */
1312 	return (xcp->mc_msrvalues[0].value);
1313 }
1314 
1315 int
1316 xen_map_gref(uint_t cmd, gnttab_map_grant_ref_t *mapop, uint_t count,
1317     boolean_t uvaddr)
1318 {
1319 	long rc;
1320 
1321 	ASSERT(cmd == GNTTABOP_map_grant_ref);
1322 	rc = HYPERVISOR_grant_table_op(cmd, mapop, count);
1323 
1324 #if !defined(_BOOT)
1325 	/*
1326 	 * XXPV --
1327 	 * The map_grant_ref call suffers a poor design flaw.
1328 	 * It's the only hypervisor interface that creates page table mappings
1329 	 * that doesn't take an entire PTE. Hence we can't create the
1330 	 * mapping with a particular setting of the software PTE bits, NX, etc.
1331 	 *
1332 	 * Until the interface is fixed, we need to minimize the possiblity
1333 	 * of dtrace or kmdb blowing up on a foreign mapping that doesn't
1334 	 * have a correct setting for the soft bits. We'll force them here.
1335 	 */
1336 	if ((rc == 0) && (uvaddr == B_FALSE)) {
1337 		extern void xen_fix_foreign(struct hat *, uint64_t);
1338 		uint_t i;
1339 		for (i = 0; i < count; ++i) {
1340 			if (mapop[i].status == GNTST_okay) {
1341 				xen_fix_foreign(kas.a_hat, mapop[i].host_addr);
1342 			}
1343 		}
1344 	}
1345 #endif
1346 
1347 	return (rc);
1348 }
1349