xref: /illumos-gate/usr/src/uts/i86pc/i86hvm/io/xpv/xpv_support.c (revision 66f654faf94d77a6760e083cb715592f4a408046)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/modctl.h>
27 #include <sys/types.h>
28 #include <sys/archsystm.h>
29 #include <sys/machsystm.h>
30 #include <sys/sunndi.h>
31 #include <sys/sunddi.h>
32 #include <sys/ddi_subrdefs.h>
33 #include <sys/xpv_support.h>
34 #include <sys/xen_errno.h>
35 #include <sys/hypervisor.h>
36 #include <sys/gnttab.h>
37 #include <sys/xenbus_comms.h>
38 #include <sys/xenbus_impl.h>
39 #include <xen/sys/xendev.h>
40 #include <sys/sysmacros.h>
41 #include <sys/x86_archext.h>
42 #include <sys/mman.h>
43 #include <sys/stat.h>
44 #include <sys/conf.h>
45 #include <sys/devops.h>
46 #include <sys/pc_mmu.h>
47 #include <sys/cmn_err.h>
48 #include <sys/cpr.h>
49 #include <sys/ddi.h>
50 #include <vm/seg_kmem.h>
51 #include <vm/as.h>
52 #include <vm/hat_pte.h>
53 #include <vm/hat_i86.h>
54 
55 #define	XPV_MINOR 0
56 #define	XPV_BUFSIZE 128
57 
58 /* virtual addr for the store_mfn page */
59 caddr_t xb_addr;
60 
61 dev_info_t *xpv_dip;
62 static dev_info_t *xpvd_dip;
63 
64 #ifdef DEBUG
65 int xen_suspend_debug;
66 
67 #define	SUSPEND_DEBUG if (xen_suspend_debug) xen_printf
68 #else
69 #define	SUSPEND_DEBUG(...)
70 #endif
71 
72 /*
73  * Forward declarations
74  */
75 static int xpv_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
76 static int xpv_attach(dev_info_t *, ddi_attach_cmd_t);
77 static int xpv_detach(dev_info_t *, ddi_detach_cmd_t);
78 static int xpv_open(dev_t *, int, int, cred_t *);
79 static int xpv_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
80 
81 static struct cb_ops xpv_cb_ops = {
82 	xpv_open,
83 	nulldev,	/* close */
84 	nodev,		/* strategy */
85 	nodev,		/* print */
86 	nodev,		/* dump */
87 	nodev,		/* read */
88 	nodev,		/* write */
89 	xpv_ioctl,	/* ioctl */
90 	nodev,		/* devmap */
91 	nodev,		/* mmap */
92 	nodev,		/* segmap */
93 	nochpoll,	/* poll */
94 	ddi_prop_op,
95 	NULL,
96 	D_MP,
97 	CB_REV,
98 	NULL,
99 	NULL
100 };
101 
102 static struct dev_ops xpv_dv_ops = {
103 	DEVO_REV,
104 	0,
105 	xpv_getinfo,
106 	nulldev,	/* identify */
107 	nulldev,	/* probe */
108 	xpv_attach,
109 	xpv_detach,
110 	nodev,		/* reset */
111 	&xpv_cb_ops,
112 	NULL,		/* struct bus_ops */
113 	NULL,		/* power */
114 	ddi_quiesce_not_supported,	/* devo_quiesce */
115 };
116 
117 static struct modldrv modldrv = {
118 	&mod_driverops,
119 	"xpv driver",
120 	&xpv_dv_ops
121 };
122 
123 static struct modlinkage modl = {
124 	MODREV_1,
125 	{
126 		(void *)&modldrv,
127 		NULL		/* null termination */
128 	}
129 };
130 
131 static ddi_dma_attr_t xpv_dma_attr = {
132 	DMA_ATTR_V0,		/* version of this structure */
133 	0,			/* lowest usable address */
134 	0xffffffffffffffffULL,	/* highest usable address */
135 	0x7fffffff,		/* maximum DMAable byte count */
136 	MMU_PAGESIZE,		/* alignment in bytes */
137 	0x7ff,			/* bitmap of burst sizes */
138 	1,			/* minimum transfer */
139 	0xffffffffU,		/* maximum transfer */
140 	0x7fffffffULL,		/* maximum segment length */
141 	1,			/* maximum number of segments */
142 	1,			/* granularity */
143 	0,			/* flags (reserved) */
144 };
145 
146 static ddi_device_acc_attr_t xpv_accattr = {
147 	DDI_DEVICE_ATTR_V0,
148 	DDI_NEVERSWAP_ACC,
149 	DDI_STRICTORDER_ACC
150 };
151 
152 #define	MAX_ALLOCATIONS 10
153 static ddi_dma_handle_t xpv_dma_handle[MAX_ALLOCATIONS];
154 static ddi_acc_handle_t xpv_dma_acchandle[MAX_ALLOCATIONS];
155 static int xen_alloc_cnt = 0;
156 
157 void *
xen_alloc_pages(pgcnt_t cnt)158 xen_alloc_pages(pgcnt_t cnt)
159 {
160 	size_t len;
161 	int a = xen_alloc_cnt++;
162 	caddr_t addr;
163 
164 	ASSERT(xen_alloc_cnt < MAX_ALLOCATIONS);
165 	if (ddi_dma_alloc_handle(xpv_dip, &xpv_dma_attr, DDI_DMA_SLEEP, 0,
166 	    &xpv_dma_handle[a]) != DDI_SUCCESS)
167 		return (NULL);
168 
169 	if (ddi_dma_mem_alloc(xpv_dma_handle[a], MMU_PAGESIZE * cnt,
170 	    &xpv_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 0,
171 	    &addr, &len, &xpv_dma_acchandle[a]) != DDI_SUCCESS) {
172 		ddi_dma_free_handle(&xpv_dma_handle[a]);
173 		cmn_err(CE_WARN, "Couldn't allocate memory for xpv devices");
174 		return (NULL);
175 	}
176 	return (addr);
177 }
178 
179 /*
180  * This function is invoked twice, first time with reprogram=0 to set up
181  * the xpvd portion of the device tree. The second time it is ignored.
182  */
183 static void
xpv_enumerate(int reprogram)184 xpv_enumerate(int reprogram)
185 {
186 	dev_info_t *dip;
187 
188 	if (reprogram != 0)
189 		return;
190 
191 	ndi_devi_alloc_sleep(ddi_root_node(), "xpvd",
192 	    (pnode_t)DEVI_SID_NODEID, &dip);
193 
194 	(void) ndi_devi_bind_driver(dip, 0);
195 
196 	/*
197 	 * Too early to enumerate split device drivers in domU
198 	 * since we need to create taskq thread during enumeration.
199 	 * So, we only enumerate softdevs and console here.
200 	 */
201 	xendev_enum_all(dip, B_TRUE);
202 }
203 
204 /*
205  * Translate a hypervisor errcode to a Solaris error code.
206  */
207 int
xen_xlate_errcode(int error)208 xen_xlate_errcode(int error)
209 {
210 #define	CASE(num)	case X_##num: error = num; break
211 
212 	switch (-error) {
213 		CASE(EPERM);    CASE(ENOENT);   CASE(ESRCH);
214 		CASE(EINTR);	CASE(EIO);	CASE(ENXIO);
215 		CASE(E2BIG);    CASE(ENOMEM);   CASE(EACCES);
216 		CASE(EFAULT);   CASE(EBUSY);    CASE(EEXIST);
217 		CASE(ENODEV);   CASE(EISDIR);   CASE(EINVAL);
218 		CASE(ENOSPC);   CASE(ESPIPE);   CASE(EROFS);
219 		CASE(ENOSYS);   CASE(ENOTEMPTY); CASE(EISCONN);
220 		CASE(ENODATA);
221 		default:
222 		panic("xen_xlate_errcode: unknown error %d", error);
223 	}
224 	return (error);
225 #undef CASE
226 }
227 
228 /*PRINTFLIKE1*/
229 void
xen_printf(const char * fmt,...)230 xen_printf(const char *fmt, ...)
231 {
232 	va_list adx;
233 
234 	va_start(adx, fmt);
235 	printf(fmt, adx);
236 	va_end(adx);
237 }
238 
239 /*
240  * Stub functions to get the FE drivers to build, and to catch drivers that
241  * misbehave in HVM domains.
242  */
243 /*ARGSUSED*/
244 void
xen_release_pfn(pfn_t pfn)245 xen_release_pfn(pfn_t pfn)
246 {
247 	panic("xen_release_pfn() is not supported in HVM domains");
248 }
249 
250 /*ARGSUSED*/
251 void
reassign_pfn(pfn_t pfn,mfn_t mfn)252 reassign_pfn(pfn_t pfn, mfn_t mfn)
253 {
254 	panic("reassign_pfn() is not supported in HVM domains");
255 }
256 
257 /*ARGSUSED*/
258 long
balloon_free_pages(uint_t page_cnt,mfn_t * mfns,caddr_t kva,pfn_t * pfns)259 balloon_free_pages(uint_t page_cnt, mfn_t *mfns, caddr_t kva, pfn_t *pfns)
260 {
261 	panic("balloon_free_pages() is not supported in HVM domains");
262 	return (0);
263 }
264 
265 /*ARGSUSED*/
266 void
balloon_drv_added(int64_t delta)267 balloon_drv_added(int64_t delta)
268 {
269 	panic("balloon_drv_added() is not supported in HVM domains");
270 }
271 
272 /*
273  * Add a mapping for the machine page at the given virtual address.
274  */
275 void
kbm_map_ma(maddr_t ma,uintptr_t va,uint_t level)276 kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level)
277 {
278 	ASSERT(level == 0);
279 
280 	hat_devload(kas.a_hat, (caddr_t)va, MMU_PAGESIZE,
281 	    mmu_btop(ma), PROT_READ | PROT_WRITE, HAT_LOAD);
282 }
283 
284 /*ARGSUSED*/
285 int
xen_map_gref(uint_t cmd,gnttab_map_grant_ref_t * mapop,uint_t count,boolean_t uvaddr)286 xen_map_gref(uint_t cmd, gnttab_map_grant_ref_t *mapop, uint_t count,
287     boolean_t uvaddr)
288 {
289 	long rc;
290 
291 	ASSERT(cmd == GNTTABOP_map_grant_ref);
292 	rc = HYPERVISOR_grant_table_op(cmd, mapop, count);
293 
294 	return (rc);
295 }
296 
297 static struct xenbus_watch shutdown_watch;
298 taskq_t *xen_shutdown_tq;
299 
300 #define	SHUTDOWN_INVALID	-1
301 #define	SHUTDOWN_POWEROFF	0
302 #define	SHUTDOWN_REBOOT		1
303 #define	SHUTDOWN_SUSPEND	2
304 #define	SHUTDOWN_HALT		3
305 #define	SHUTDOWN_MAX		4
306 
307 #define	SHUTDOWN_TIMEOUT_SECS (60 * 5)
308 
309 int
xen_suspend_devices(dev_info_t * dip)310 xen_suspend_devices(dev_info_t *dip)
311 {
312 	int error;
313 	char buf[XPV_BUFSIZE];
314 
315 	SUSPEND_DEBUG("xen_suspend_devices\n");
316 
317 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
318 		if (xen_suspend_devices(ddi_get_child(dip)))
319 			return (ENXIO);
320 		if (ddi_get_driver(dip) == NULL)
321 			continue;
322 		SUSPEND_DEBUG("Suspending device %s\n", ddi_deviname(dip, buf));
323 		ASSERT((DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED) == 0);
324 
325 
326 		if (!i_ddi_devi_attached(dip)) {
327 			error = DDI_FAILURE;
328 		} else {
329 			error = devi_detach(dip, DDI_SUSPEND);
330 		}
331 
332 		if (error == DDI_SUCCESS) {
333 			DEVI(dip)->devi_cpr_flags |= DCF_CPR_SUSPENDED;
334 		} else {
335 			SUSPEND_DEBUG("WARNING: Unable to suspend device %s\n",
336 			    ddi_deviname(dip, buf));
337 			cmn_err(CE_WARN, "Unable to suspend device %s.",
338 			    ddi_deviname(dip, buf));
339 			cmn_err(CE_WARN, "Device is busy or does not "
340 			    "support suspend/resume.");
341 				return (ENXIO);
342 		}
343 	}
344 	return (0);
345 }
346 
347 int
xen_resume_devices(dev_info_t * start,int resume_failed)348 xen_resume_devices(dev_info_t *start, int resume_failed)
349 {
350 	dev_info_t *dip, *next, *last = NULL;
351 	int did_suspend;
352 	int error = resume_failed;
353 	char buf[XPV_BUFSIZE];
354 
355 	SUSPEND_DEBUG("xen_resume_devices\n");
356 
357 	while (last != start) {
358 		dip = start;
359 		next = ddi_get_next_sibling(dip);
360 		while (next != last) {
361 			dip = next;
362 			next = ddi_get_next_sibling(dip);
363 		}
364 
365 		/*
366 		 * cpr is the only one that uses this field and the device
367 		 * itself hasn't resumed yet, there is no need to use a
368 		 * lock, even though kernel threads are active by now.
369 		 */
370 		did_suspend = DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED;
371 		if (did_suspend)
372 			DEVI(dip)->devi_cpr_flags &= ~DCF_CPR_SUSPENDED;
373 
374 		/*
375 		 * There may be background attaches happening on devices
376 		 * that were not originally suspended by cpr, so resume
377 		 * only devices that were suspended by cpr. Also, stop
378 		 * resuming after the first resume failure, but traverse
379 		 * the entire tree to clear the suspend flag.
380 		 */
381 		if (did_suspend && !error) {
382 			SUSPEND_DEBUG("Resuming device %s\n",
383 			    ddi_deviname(dip, buf));
384 			/*
385 			 * If a device suspended by cpr gets detached during
386 			 * the resume process (for example, due to hotplugging)
387 			 * before cpr gets around to issuing it a DDI_RESUME,
388 			 * we'll have problems.
389 			 */
390 			if (!i_ddi_devi_attached(dip)) {
391 				cmn_err(CE_WARN, "Skipping %s, device "
392 				    "not ready for resume",
393 				    ddi_deviname(dip, buf));
394 			} else {
395 				if (devi_attach(dip, DDI_RESUME) !=
396 				    DDI_SUCCESS) {
397 					error = ENXIO;
398 				}
399 			}
400 		}
401 
402 		if (error == ENXIO) {
403 			cmn_err(CE_WARN, "Unable to resume device %s",
404 			    ddi_deviname(dip, buf));
405 		}
406 
407 		error = xen_resume_devices(ddi_get_child(dip), error);
408 		last = dip;
409 	}
410 
411 	return (error);
412 }
413 
414 /*ARGSUSED*/
415 static int
check_xpvd(dev_info_t * dip,void * arg)416 check_xpvd(dev_info_t *dip, void *arg)
417 {
418 	char *name;
419 
420 	name = ddi_node_name(dip);
421 	if (name == NULL || strcmp(name, "xpvd")) {
422 		return (DDI_WALK_CONTINUE);
423 	} else {
424 		xpvd_dip = dip;
425 		return (DDI_WALK_TERMINATE);
426 	}
427 }
428 
429 /*
430  * Top level routine to direct suspend/resume of a domain.
431  */
432 void
xen_suspend_domain(void)433 xen_suspend_domain(void)
434 {
435 	extern void rtcsync(void);
436 	extern void ec_resume(void);
437 	extern kmutex_t ec_lock;
438 	struct xen_add_to_physmap xatp;
439 	ulong_t flags;
440 	int err;
441 
442 	cmn_err(CE_NOTE, "Domain suspending for save/migrate");
443 
444 	SUSPEND_DEBUG("xen_suspend_domain\n");
445 
446 	/*
447 	 * We only want to suspend the PV devices, since the emulated devices
448 	 * are suspended by saving the emulated device state.  The PV devices
449 	 * are all children of the xpvd nexus device.  So we search the
450 	 * device tree for the xpvd node to use as the root of the tree to
451 	 * be suspended.
452 	 */
453 	if (xpvd_dip == NULL)
454 		ddi_walk_devs(ddi_root_node(), check_xpvd, NULL);
455 
456 	/*
457 	 * suspend interrupts and devices
458 	 */
459 	if (xpvd_dip != NULL)
460 		(void) xen_suspend_devices(ddi_get_child(xpvd_dip));
461 	else
462 		cmn_err(CE_WARN, "No PV devices found to suspend");
463 	SUSPEND_DEBUG("xenbus_suspend\n");
464 	xenbus_suspend();
465 
466 	mutex_enter(&cpu_lock);
467 
468 	/*
469 	 * Suspend on vcpu 0
470 	 */
471 	thread_affinity_set(curthread, 0);
472 	kpreempt_disable();
473 
474 	if (ncpus > 1)
475 		pause_cpus(NULL, NULL);
476 	/*
477 	 * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
478 	 * any holder would have dropped it to get through pause_cpus().
479 	 */
480 	mutex_enter(&ec_lock);
481 
482 	/*
483 	 * From here on in, we can't take locks.
484 	 */
485 
486 	flags = intr_clear();
487 
488 	SUSPEND_DEBUG("HYPERVISOR_suspend\n");
489 	/*
490 	 * At this point we suspend and sometime later resume.
491 	 * Note that this call may return with an indication of a cancelled
492 	 * for now no matter ehat the return we do a full resume of all
493 	 * suspended drivers, etc.
494 	 */
495 	(void) HYPERVISOR_shutdown(SHUTDOWN_suspend);
496 
497 	/*
498 	 * Point HYPERVISOR_shared_info to the proper place.
499 	 */
500 	xatp.domid = DOMID_SELF;
501 	xatp.idx = 0;
502 	xatp.space = XENMAPSPACE_shared_info;
503 	xatp.gpfn = xen_shared_info_frame;
504 	if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0)
505 		panic("Could not set shared_info page. error: %d", err);
506 
507 	SUSPEND_DEBUG("gnttab_resume\n");
508 	gnttab_resume();
509 
510 	SUSPEND_DEBUG("ec_resume\n");
511 	ec_resume();
512 
513 	intr_restore(flags);
514 
515 	if (ncpus > 1)
516 		start_cpus();
517 
518 	mutex_exit(&ec_lock);
519 	mutex_exit(&cpu_lock);
520 
521 	/*
522 	 * Now we can take locks again.
523 	 */
524 
525 	rtcsync();
526 
527 	SUSPEND_DEBUG("xenbus_resume\n");
528 	xenbus_resume();
529 	SUSPEND_DEBUG("xen_resume_devices\n");
530 	if (xpvd_dip != NULL)
531 		(void) xen_resume_devices(ddi_get_child(xpvd_dip), 0);
532 
533 	thread_affinity_clear(curthread);
534 	kpreempt_enable();
535 
536 	SUSPEND_DEBUG("finished xen_suspend_domain\n");
537 
538 	cmn_err(CE_NOTE, "domain restore/migrate completed");
539 }
540 
541 static void
xen_dirty_shutdown(void * arg)542 xen_dirty_shutdown(void *arg)
543 {
544 	int cmd = (uintptr_t)arg;
545 
546 	cmn_err(CE_WARN, "Externally requested shutdown failed or "
547 	    "timed out.\nShutting down.\n");
548 
549 	switch (cmd) {
550 	case SHUTDOWN_HALT:
551 	case SHUTDOWN_POWEROFF:
552 		(void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred);
553 		break;
554 	case SHUTDOWN_REBOOT:
555 		(void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred);
556 		break;
557 	}
558 }
559 
560 static void
xen_shutdown(void * arg)561 xen_shutdown(void *arg)
562 {
563 	int cmd = (uintptr_t)arg;
564 	proc_t *initpp;
565 
566 	ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX);
567 
568 	if (cmd == SHUTDOWN_SUSPEND) {
569 		xen_suspend_domain();
570 		return;
571 	}
572 
573 	switch (cmd) {
574 	case SHUTDOWN_POWEROFF:
575 		force_shutdown_method = AD_POWEROFF;
576 		break;
577 	case SHUTDOWN_HALT:
578 		force_shutdown_method = AD_HALT;
579 		break;
580 	case SHUTDOWN_REBOOT:
581 		force_shutdown_method = AD_BOOT;
582 		break;
583 	}
584 
585 
586 	/*
587 	 * If we're still booting and init(1) isn't set up yet, simply halt.
588 	 */
589 	mutex_enter(&pidlock);
590 	initpp = prfind(P_INITPID);
591 	mutex_exit(&pidlock);
592 	if (initpp == NULL) {
593 		extern void halt(char *);
594 		halt("Power off the System");   /* just in case */
595 	}
596 
597 	/*
598 	 * else, graceful shutdown with inittab and all getting involved
599 	 */
600 	psignal(initpp, SIGPWR);
601 
602 	(void) timeout(xen_dirty_shutdown, arg,
603 	    SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC));
604 }
605 
606 /*ARGSUSED*/
607 static void
xen_shutdown_handler(struct xenbus_watch * watch,const char ** vec,unsigned int len)608 xen_shutdown_handler(struct xenbus_watch *watch, const char **vec,
609 	unsigned int len)
610 {
611 	char *str;
612 	xenbus_transaction_t xbt;
613 	int err, shutdown_code = SHUTDOWN_INVALID;
614 	unsigned int slen;
615 
616 again:
617 	err = xenbus_transaction_start(&xbt);
618 	if (err)
619 		return;
620 	if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) {
621 		(void) xenbus_transaction_end(xbt, 1);
622 		return;
623 	}
624 
625 	SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str);
626 
627 	/*
628 	 * If this is a watch fired from our write below, check out early to
629 	 * avoid an infinite loop.
630 	 */
631 	if (strcmp(str, "") == 0) {
632 		(void) xenbus_transaction_end(xbt, 0);
633 		kmem_free(str, slen);
634 		return;
635 	} else if (strcmp(str, "poweroff") == 0) {
636 		shutdown_code = SHUTDOWN_POWEROFF;
637 	} else if (strcmp(str, "reboot") == 0) {
638 		shutdown_code = SHUTDOWN_REBOOT;
639 	} else if (strcmp(str, "suspend") == 0) {
640 		shutdown_code = SHUTDOWN_SUSPEND;
641 	} else if (strcmp(str, "halt") == 0) {
642 		shutdown_code = SHUTDOWN_HALT;
643 	} else {
644 		printf("Ignoring shutdown request: %s\n", str);
645 	}
646 
647 	(void) xenbus_write(xbt, "control", "shutdown", "");
648 	err = xenbus_transaction_end(xbt, 0);
649 	if (err == EAGAIN) {
650 		SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id);
651 		kmem_free(str, slen);
652 		goto again;
653 	}
654 
655 	kmem_free(str, slen);
656 	if (shutdown_code != SHUTDOWN_INVALID) {
657 		(void) taskq_dispatch(xen_shutdown_tq, xen_shutdown,
658 		    (void *)(intptr_t)shutdown_code, 0);
659 	}
660 }
661 
662 static int
xpv_drv_init(void)663 xpv_drv_init(void)
664 {
665 	if (xpv_feature(XPVF_HYPERCALLS) < 0 ||
666 	    xpv_feature(XPVF_SHARED_INFO) < 0)
667 		return (-1);
668 
669 	/* Set up the grant tables.  */
670 	gnttab_init();
671 
672 	/* Set up event channel support */
673 	if (ec_init() != 0)
674 		return (-1);
675 
676 	/* Set up xenbus */
677 	xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP);
678 	xs_early_init();
679 	xs_domu_init();
680 
681 	/* Set up for suspend/resume/migrate */
682 	xen_shutdown_tq = taskq_create("shutdown_taskq", 1,
683 	    maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
684 	shutdown_watch.node = "control/shutdown";
685 	shutdown_watch.callback = xen_shutdown_handler;
686 	if (register_xenbus_watch(&shutdown_watch))
687 		cmn_err(CE_WARN, "Failed to set shutdown watcher");
688 
689 	return (0);
690 }
691 
692 static void
xen_pv_fini()693 xen_pv_fini()
694 {
695 	ec_fini();
696 }
697 
698 /*ARGSUSED*/
699 static int
xpv_getinfo(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)700 xpv_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
701 {
702 	if (getminor((dev_t)arg) != XPV_MINOR)
703 		return (DDI_FAILURE);
704 
705 	switch (cmd) {
706 	case DDI_INFO_DEVT2DEVINFO:
707 		*result = xpv_dip;
708 		break;
709 	case DDI_INFO_DEVT2INSTANCE:
710 		*result = 0;
711 		break;
712 	default:
713 		return (DDI_FAILURE);
714 	}
715 
716 	return (DDI_SUCCESS);
717 }
718 
719 static int
xpv_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)720 xpv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
721 {
722 	if (cmd != DDI_ATTACH)
723 		return (DDI_FAILURE);
724 
725 	if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR,
726 	    ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
727 		return (DDI_FAILURE);
728 
729 	xpv_dip = dip;
730 
731 	if (xpv_drv_init() != 0)
732 		return (DDI_FAILURE);
733 
734 	ddi_report_dev(dip);
735 
736 	/*
737 	 * If the memscrubber attempts to scrub the pages we hand to Xen,
738 	 * the domain will panic.
739 	 */
740 	memscrub_disable();
741 
742 	/* Report our version to dom0 */
743 	(void) xenbus_printf(XBT_NULL, "guest/xpv", "version", "%d",
744 	    HVMPV_XPV_VERS);
745 
746 	return (DDI_SUCCESS);
747 }
748 
749 /*
750  * Attempts to reload the PV driver plumbing hang on Intel platforms, so
751  * we don't want to unload the framework by accident.
752  */
753 int xpv_allow_detach = 0;
754 
755 static int
xpv_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)756 xpv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
757 {
758 	if (cmd != DDI_DETACH || xpv_allow_detach == 0)
759 		return (DDI_FAILURE);
760 
761 	if (xpv_dip != NULL) {
762 		xen_pv_fini();
763 		ddi_remove_minor_node(dip, NULL);
764 		xpv_dip = NULL;
765 	}
766 
767 	return (DDI_SUCCESS);
768 }
769 
770 /*ARGSUSED1*/
771 static int
xpv_open(dev_t * dev,int flag,int otyp,cred_t * cr)772 xpv_open(dev_t *dev, int flag, int otyp, cred_t *cr)
773 {
774 	return (getminor(*dev) == XPV_MINOR ? 0 : ENXIO);
775 }
776 
777 /*ARGSUSED*/
778 static int
xpv_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * cr,int * rval_p)779 xpv_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr,
780     int *rval_p)
781 {
782 	return (EINVAL);
783 }
784 
785 int
_init(void)786 _init(void)
787 {
788 	int err;
789 
790 	if ((err = mod_install(&modl)) != 0)
791 		return (err);
792 
793 	impl_bus_add_probe(xpv_enumerate);
794 	return (0);
795 }
796 
797 int
_fini(void)798 _fini(void)
799 {
800 	int err;
801 
802 	if ((err = mod_remove(&modl)) != 0)
803 		return (err);
804 
805 	impl_bus_delete_probe(xpv_enumerate);
806 	return (0);
807 }
808 
809 int
_info(struct modinfo * modinfop)810 _info(struct modinfo *modinfop)
811 {
812 	return (mod_info(&modl, modinfop));
813 }
814