xref: /titanic_50/usr/src/uts/common/xen/io/xpvtap.c (revision 5aa2fb58740c03ee1a7579942aad13294703a008)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 
28 #include <sys/errno.h>
29 #include <sys/types.h>
30 #include <sys/conf.h>
31 #include <sys/kmem.h>
32 #include <sys/ddi.h>
33 #include <sys/stat.h>
34 #include <sys/sunddi.h>
35 #include <sys/file.h>
36 #include <sys/open.h>
37 #include <sys/modctl.h>
38 #include <sys/ddi_impldefs.h>
39 #include <sys/sysmacros.h>
40 #include <sys/ddidevmap.h>
41 #include <sys/policy.h>
42 
43 #include <sys/vmsystm.h>
44 #include <vm/hat_i86.h>
45 #include <vm/hat_pte.h>
46 #include <vm/seg_kmem.h>
47 #include <vm/seg_mf.h>
48 
49 #include <xen/io/blkif_impl.h>
50 #include <xen/io/blk_common.h>
51 #include <xen/io/xpvtap.h>
52 
53 
54 static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred);
55 static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred);
56 static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
57     cred_t *cred, int *rval);
58 static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off,
59     size_t len, size_t *maplen, uint_t model);
60 static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
61     off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
62     cred_t *cred_p);
63 static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
64     struct pollhead **phpp);
65 
66 static 	struct cb_ops xpvtap_cb_ops = {
67 	xpvtap_open,		/* cb_open */
68 	xpvtap_close,		/* cb_close */
69 	nodev,			/* cb_strategy */
70 	nodev,			/* cb_print */
71 	nodev,			/* cb_dump */
72 	nodev,			/* cb_read */
73 	nodev,			/* cb_write */
74 	xpvtap_ioctl,		/* cb_ioctl */
75 	xpvtap_devmap,		/* cb_devmap */
76 	nodev,			/* cb_mmap */
77 	xpvtap_segmap,		/* cb_segmap */
78 	xpvtap_chpoll,		/* cb_chpoll */
79 	ddi_prop_op,		/* cb_prop_op */
80 	NULL,			/* cb_stream */
81 	D_NEW | D_MP | D_64BIT | D_DEVMAP,	/* cb_flag */
82 	CB_REV
83 };
84 
85 static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
86     void **result);
87 static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
88 static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
89 
90 static struct dev_ops xpvtap_dev_ops = {
91 	DEVO_REV,		/* devo_rev */
92 	0,			/* devo_refcnt */
93 	xpvtap_getinfo,		/* devo_getinfo */
94 	nulldev,		/* devo_identify */
95 	nulldev,		/* devo_probe */
96 	xpvtap_attach,		/* devo_attach */
97 	xpvtap_detach,		/* devo_detach */
98 	nodev,			/* devo_reset */
99 	&xpvtap_cb_ops,		/* devo_cb_ops */
100 	NULL,			/* devo_bus_ops */
101 	NULL			/* power */
102 };
103 
104 
105 static struct modldrv xpvtap_modldrv = {
106 	&mod_driverops,		/* Type of module.  This one is a driver */
107 	"xpvtap driver",	/* Name of the module. */
108 	&xpvtap_dev_ops,	/* driver ops */
109 };
110 
111 static struct modlinkage xpvtap_modlinkage = {
112 	MODREV_1,
113 	(void *) &xpvtap_modldrv,
114 	NULL
115 };
116 
117 
118 void *xpvtap_statep;
119 
120 
121 static xpvtap_state_t *xpvtap_drv_init(int instance);
122 static void xpvtap_drv_fini(xpvtap_state_t *state);
123 static uint_t xpvtap_intr(caddr_t arg);
124 
125 typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs);
126 static void xpvtap_rs_init(uint_t min_val, uint_t max_val,
127     xpvtap_rs_hdl_t *handle);
128 static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle);
129 static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs);
130 static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs);
131 static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle,
132     xpvtap_rs_cleanup_t callback, void *arg);
133 
134 static int xpvtap_segmf_register(xpvtap_state_t *state);
135 static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event);
136 
137 static int xpvtap_user_init(xpvtap_state_t *state);
138 static void xpvtap_user_fini(xpvtap_state_t *state);
139 static int xpvtap_user_ring_init(xpvtap_state_t *state);
140 static void xpvtap_user_ring_fini(xpvtap_state_t *state);
141 static int xpvtap_user_thread_init(xpvtap_state_t *state);
142 static void xpvtap_user_thread_fini(xpvtap_state_t *state);
143 static void xpvtap_user_thread_start(caddr_t arg);
144 static void xpvtap_user_thread_stop(xpvtap_state_t *state);
145 static void xpvtap_user_thread(void *arg);
146 
147 static void xpvtap_user_app_stop(caddr_t arg);
148 
149 static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
150     uint_t *uid);
151 static int xpvtap_user_request_push(xpvtap_state_t *state,
152     blkif_request_t *req, uint_t uid);
153 static int xpvtap_user_response_get(xpvtap_state_t *state,
154     blkif_response_t *resp, uint_t *uid);
155 static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid);
156 
157 
158 /*
159  * _init()
160  */
161 int
162 _init(void)
163 {
164 	int e;
165 
166 	e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1);
167 	if (e != 0) {
168 		return (e);
169 	}
170 
171 	e = mod_install(&xpvtap_modlinkage);
172 	if (e != 0) {
173 		ddi_soft_state_fini(&xpvtap_statep);
174 		return (e);
175 	}
176 
177 	return (0);
178 }
179 
180 
181 /*
182  * _info()
183  */
184 int
185 _info(struct modinfo *modinfop)
186 {
187 	return (mod_info(&xpvtap_modlinkage, modinfop));
188 }
189 
190 
191 /*
192  * _fini()
193  */
194 int
195 _fini(void)
196 {
197 	int e;
198 
199 	e = mod_remove(&xpvtap_modlinkage);
200 	if (e != 0) {
201 		return (e);
202 	}
203 
204 	ddi_soft_state_fini(&xpvtap_statep);
205 
206 	return (0);
207 }
208 
209 
210 /*
211  * xpvtap_attach()
212  */
213 static int
214 xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
215 {
216 	blk_ringinit_args_t args;
217 	xpvtap_state_t *state;
218 	int instance;
219 	int e;
220 
221 
222 	switch (cmd) {
223 	case DDI_ATTACH:
224 		break;
225 
226 	case DDI_RESUME:
227 		return (DDI_SUCCESS);
228 
229 	default:
230 		return (DDI_FAILURE);
231 	}
232 
233 	/* initialize our state info */
234 	instance = ddi_get_instance(dip);
235 	state = xpvtap_drv_init(instance);
236 	if (state == NULL) {
237 		return (DDI_FAILURE);
238 	}
239 	state->bt_dip = dip;
240 
241 	/* Initialize the guest ring */
242 	args.ar_dip = state->bt_dip;
243 	args.ar_intr = xpvtap_intr;
244 	args.ar_intr_arg = (caddr_t)state;
245 	args.ar_ringup = xpvtap_user_thread_start;
246 	args.ar_ringup_arg = (caddr_t)state;
247 	args.ar_ringdown = xpvtap_user_app_stop;
248 	args.ar_ringdown_arg = (caddr_t)state;
249 	e = blk_ring_init(&args, &state->bt_guest_ring);
250 	if (e != DDI_SUCCESS) {
251 		goto attachfail_ringinit;
252 	}
253 
254 	/* create the minor node (for ioctl/mmap) */
255 	e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance,
256 	    DDI_PSEUDO, 0);
257 	if (e != DDI_SUCCESS) {
258 		goto attachfail_minor_node;
259 	}
260 
261 	/* Report that driver was loaded */
262 	ddi_report_dev(dip);
263 
264 	return (DDI_SUCCESS);
265 
266 attachfail_minor_node:
267 	blk_ring_fini(&state->bt_guest_ring);
268 attachfail_ringinit:
269 	xpvtap_drv_fini(state);
270 	return (DDI_FAILURE);
271 }
272 
273 
274 /*
275  * xpvtap_detach()
276  */
277 static int
278 xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
279 {
280 	xpvtap_state_t *state;
281 	int instance;
282 
283 
284 	instance = ddi_get_instance(dip);
285 	state = ddi_get_soft_state(xpvtap_statep, instance);
286 	if (state == NULL) {
287 		return (DDI_FAILURE);
288 	}
289 
290 	switch (cmd) {
291 	case DDI_DETACH:
292 		break;
293 
294 	case DDI_SUSPEND:
295 	default:
296 		return (DDI_FAILURE);
297 	}
298 
299 	xpvtap_user_thread_stop(state);
300 	blk_ring_fini(&state->bt_guest_ring);
301 	xpvtap_drv_fini(state);
302 	ddi_remove_minor_node(dip, NULL);
303 
304 	return (DDI_SUCCESS);
305 }
306 
307 
308 /*
309  * xpvtap_getinfo()
310  */
311 /*ARGSUSED*/
312 static int
313 xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
314 {
315 	xpvtap_state_t *state;
316 	int instance;
317 	dev_t dev;
318 	int e;
319 
320 
321 	dev = (dev_t)arg;
322 	instance = getminor(dev);
323 
324 	switch (cmd) {
325 	case DDI_INFO_DEVT2DEVINFO:
326 		state = ddi_get_soft_state(xpvtap_statep, instance);
327 		if (state == NULL) {
328 			return (DDI_FAILURE);
329 		}
330 		*result = (void *)state->bt_dip;
331 		e = DDI_SUCCESS;
332 		break;
333 
334 	case DDI_INFO_DEVT2INSTANCE:
335 		*result = (void *)(uintptr_t)instance;
336 		e = DDI_SUCCESS;
337 		break;
338 
339 	default:
340 		e = DDI_FAILURE;
341 		break;
342 	}
343 
344 	return (e);
345 }
346 
347 
348 /*
349  * xpvtap_open()
350  */
351 /*ARGSUSED*/
352 static int
353 xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred)
354 {
355 	xpvtap_state_t *state;
356 	int instance;
357 
358 
359 	if (secpolicy_xvm_control(cred)) {
360 		return (EPERM);
361 	}
362 
363 	instance = getminor(*devp);
364 	state = ddi_get_soft_state(xpvtap_statep, instance);
365 	if (state == NULL) {
366 		return (ENXIO);
367 	}
368 
369 	/* we should only be opened once */
370 	mutex_enter(&state->bt_open.bo_mutex);
371 	if (state->bt_open.bo_opened) {
372 		mutex_exit(&state->bt_open.bo_mutex);
373 		return (EBUSY);
374 	}
375 	state->bt_open.bo_opened = B_TRUE;
376 	mutex_exit(&state->bt_open.bo_mutex);
377 
378 	/*
379 	 * save the apps address space. need it for mapping/unmapping grefs
380 	 * since will be doing it in a separate kernel thread.
381 	 */
382 	state->bt_map.um_as = curproc->p_as;
383 
384 	return (0);
385 }
386 
387 
388 /*
389  * xpvtap_close()
390  */
391 /*ARGSUSED*/
392 static int
393 xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred)
394 {
395 	xpvtap_state_t *state;
396 	int instance;
397 
398 
399 	instance = getminor(devp);
400 	state = ddi_get_soft_state(xpvtap_statep, instance);
401 	if (state == NULL) {
402 		return (ENXIO);
403 	}
404 
405 	/*
406 	 * wake thread so it can cleanup and wait for it to exit so we can
407 	 * be sure it's not in the middle of processing a request/response.
408 	 */
409 	mutex_enter(&state->bt_thread.ut_mutex);
410 	state->bt_thread.ut_wake = B_TRUE;
411 	state->bt_thread.ut_exit = B_TRUE;
412 	cv_signal(&state->bt_thread.ut_wake_cv);
413 	if (!state->bt_thread.ut_exit_done) {
414 		cv_wait(&state->bt_thread.ut_exit_done_cv,
415 		    &state->bt_thread.ut_mutex);
416 	}
417 	ASSERT(state->bt_thread.ut_exit_done);
418 	mutex_exit(&state->bt_thread.ut_mutex);
419 
420 	state->bt_map.um_as = NULL;
421 	state->bt_map.um_guest_pages = NULL;
422 
423 	/*
424 	 * when the ring is brought down, a userland hotplug script is run
425 	 * which tries to bring the userland app down. We'll wait for a bit
426 	 * for the user app to exit. Notify the thread waiting that the app
427 	 * has closed the driver.
428 	 */
429 	mutex_enter(&state->bt_open.bo_mutex);
430 	ASSERT(state->bt_open.bo_opened);
431 	state->bt_open.bo_opened = B_FALSE;
432 	cv_signal(&state->bt_open.bo_exit_cv);
433 	mutex_exit(&state->bt_open.bo_mutex);
434 
435 	return (0);
436 }
437 
438 
439 /*
440  * xpvtap_ioctl()
441  */
442 /*ARGSUSED*/
443 static int
444 xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
445     int *rval)
446 {
447 	xpvtap_state_t *state;
448 	int instance;
449 
450 
451 	if (secpolicy_xvm_control(cred)) {
452 		return (EPERM);
453 	}
454 
455 	instance = getminor(dev);
456 	if (instance == -1) {
457 		return (EBADF);
458 	}
459 
460 	state = ddi_get_soft_state(xpvtap_statep, instance);
461 	if (state == NULL) {
462 		return (EBADF);
463 	}
464 
465 	switch (cmd) {
466 	case XPVTAP_IOCTL_RESP_PUSH:
467 		/*
468 		 * wake thread, thread handles guest requests and user app
469 		 * responses.
470 		 */
471 		mutex_enter(&state->bt_thread.ut_mutex);
472 		state->bt_thread.ut_wake = B_TRUE;
473 		cv_signal(&state->bt_thread.ut_wake_cv);
474 		mutex_exit(&state->bt_thread.ut_mutex);
475 		break;
476 
477 	default:
478 		cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd);
479 		return (ENXIO);
480 	}
481 
482 	return (0);
483 }
484 
485 
486 /*
487  * xpvtap_segmap()
488  */
489 /*ARGSUSED*/
490 static int
491 xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
492     off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
493     cred_t *cred_p)
494 {
495 	struct segmf_crargs a;
496 	xpvtap_state_t *state;
497 	int instance;
498 	int e;
499 
500 
501 	if (secpolicy_xvm_control(cred_p)) {
502 		return (EPERM);
503 	}
504 
505 	instance = getminor(dev);
506 	state = ddi_get_soft_state(xpvtap_statep, instance);
507 	if (state == NULL) {
508 		return (EBADF);
509 	}
510 
511 	/* the user app should be doing a MAP_SHARED mapping */
512 	if ((flags & MAP_TYPE) != MAP_SHARED) {
513 		return (EINVAL);
514 	}
515 
516 	/*
517 	 * if this is the user ring (offset = 0), devmap it (which ends up in
518 	 * xpvtap_devmap). devmap will alloc and map the ring into the
519 	 * app's VA space.
520 	 */
521 	if (off == 0) {
522 		e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len,
523 		    prot, maxprot, flags, cred_p);
524 		return (e);
525 	}
526 
527 	/* this should be the mmap for the gref pages (offset = PAGESIZE) */
528 	if (off != PAGESIZE) {
529 		return (EINVAL);
530 	}
531 
532 	/* make sure we get the size we're expecting */
533 	if (len != XPVTAP_GREF_BUFSIZE) {
534 		return (EINVAL);
535 	}
536 
537 	/*
538 	 * reserve user app VA space for the gref pages and use segmf to
539 	 * manage the backing store for the physical memory. segmf will
540 	 * map in/out the grefs and fault them in/out.
541 	 */
542 	ASSERT(asp == state->bt_map.um_as);
543 	as_rangelock(asp);
544 	if ((flags & MAP_FIXED) == 0) {
545 		map_addr(addrp, len, 0, 0, flags);
546 		if (*addrp == NULL) {
547 			as_rangeunlock(asp);
548 			return (ENOMEM);
549 		}
550 	} else {
551 		/* User specified address */
552 		(void) as_unmap(asp, *addrp, len);
553 	}
554 	a.dev = dev;
555 	a.prot = (uchar_t)prot;
556 	a.maxprot = (uchar_t)maxprot;
557 	e = as_map(asp, *addrp, len, segmf_create, &a);
558 	if (e != 0) {
559 		as_rangeunlock(asp);
560 		return (e);
561 	}
562 	as_rangeunlock(asp);
563 
564 	/*
565 	 * Stash user base address, and compute address where the request
566 	 * array will end up.
567 	 */
568 	state->bt_map.um_guest_pages = (caddr_t)*addrp;
569 	state->bt_map.um_guest_size = (size_t)len;
570 
571 	/* register an as callback so we can cleanup when the app goes away */
572 	e = as_add_callback(asp, xpvtap_segmf_unregister, state,
573 	    AS_UNMAP_EVENT, *addrp, len, KM_SLEEP);
574 	if (e != 0) {
575 		(void) as_unmap(asp, *addrp, len);
576 		return (EINVAL);
577 	}
578 
579 	/* wake thread to see if there are requests already queued up */
580 	mutex_enter(&state->bt_thread.ut_mutex);
581 	state->bt_thread.ut_wake = B_TRUE;
582 	cv_signal(&state->bt_thread.ut_wake_cv);
583 	mutex_exit(&state->bt_thread.ut_mutex);
584 
585 	return (0);
586 }
587 
588 
589 /*
590  * xpvtap_devmap()
591  */
592 /*ARGSUSED*/
593 static int
594 xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
595     size_t *maplen, uint_t model)
596 {
597 	xpvtap_user_ring_t *usring;
598 	xpvtap_state_t *state;
599 	int instance;
600 	int e;
601 
602 
603 	instance = getminor(dev);
604 	state = ddi_get_soft_state(xpvtap_statep, instance);
605 	if (state == NULL) {
606 		return (EBADF);
607 	}
608 
609 	/* we should only get here if the offset was == 0 */
610 	if (off != 0) {
611 		return (EINVAL);
612 	}
613 
614 	/* we should only be mapping in one page */
615 	if (len != PAGESIZE) {
616 		return (EINVAL);
617 	}
618 
619 	/*
620 	 * we already allocated the user ring during driver attach, all we
621 	 * need to do is map it into the user app's VA.
622 	 */
623 	usring = &state->bt_user_ring;
624 	e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0,
625 	    PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL);
626 	if (e < 0) {
627 		return (e);
628 	}
629 
630 	/* return the size to compete the devmap */
631 	*maplen = PAGESIZE;
632 
633 	return (0);
634 }
635 
636 
637 /*
638  * xpvtap_chpoll()
639  */
640 static int
641 xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
642     struct pollhead **phpp)
643 {
644 	xpvtap_user_ring_t *usring;
645 	xpvtap_state_t *state;
646 	int instance;
647 
648 
649 	instance = getminor(dev);
650 	if (instance == -1) {
651 		return (EBADF);
652 	}
653 	state = ddi_get_soft_state(xpvtap_statep, instance);
654 	if (state == NULL) {
655 		return (EBADF);
656 	}
657 
658 	if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) {
659 		*reventsp = 0;
660 		return (EINVAL);
661 	}
662 
663 	/*
664 	 * if we pushed requests on the user ring since the last poll, wakeup
665 	 * the user app
666 	 */
667 	usring = &state->bt_user_ring;
668 	if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) {
669 
670 		/*
671 		 * XXX - is this faster here or xpvtap_user_request_push??
672 		 * prelim data says here.  Because less membars or because
673 		 * user thread will spin in poll requests before getting to
674 		 * responses?
675 		 */
676 		RING_PUSH_REQUESTS(&usring->ur_ring);
677 
678 		usring->ur_prod_polled = usring->ur_ring.sring->req_prod;
679 		*reventsp =  POLLIN | POLLRDNORM;
680 
681 	/* no new requests */
682 	} else {
683 		*reventsp = 0;
684 		if (!anyyet) {
685 			*phpp = &state->bt_pollhead;
686 		}
687 	}
688 
689 	return (0);
690 }
691 
692 
693 /*
694  * xpvtap_drv_init()
695  */
696 static xpvtap_state_t *
697 xpvtap_drv_init(int instance)
698 {
699 	xpvtap_state_t *state;
700 	int e;
701 
702 
703 	e = ddi_soft_state_zalloc(xpvtap_statep, instance);
704 	if (e != DDI_SUCCESS) {
705 		return (NULL);
706 	}
707 	state = ddi_get_soft_state(xpvtap_statep, instance);
708 	if (state == NULL) {
709 		goto drvinitfail_get_soft_state;
710 	}
711 
712 	state->bt_instance = instance;
713 	mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL);
714 	cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL);
715 	state->bt_open.bo_opened = B_FALSE;
716 	state->bt_map.um_registered = B_FALSE;
717 
718 	/* initialize user ring, thread, mapping state */
719 	e = xpvtap_user_init(state);
720 	if (e != DDI_SUCCESS) {
721 		goto drvinitfail_userinit;
722 	}
723 
724 	return (state);
725 
726 drvinitfail_userinit:
727 	cv_destroy(&state->bt_open.bo_exit_cv);
728 	mutex_destroy(&state->bt_open.bo_mutex);
729 drvinitfail_get_soft_state:
730 	(void) ddi_soft_state_free(xpvtap_statep, instance);
731 	return (NULL);
732 }
733 
734 
735 /*
736  * xpvtap_drv_fini()
737  */
738 static void
739 xpvtap_drv_fini(xpvtap_state_t *state)
740 {
741 	xpvtap_user_fini(state);
742 	cv_destroy(&state->bt_open.bo_exit_cv);
743 	mutex_destroy(&state->bt_open.bo_mutex);
744 	(void) ddi_soft_state_free(xpvtap_statep, state->bt_instance);
745 }
746 
747 
748 /*
749  * xpvtap_intr()
750  *    this routine will be called when we have a request on the guest ring.
751  */
752 static uint_t
753 xpvtap_intr(caddr_t arg)
754 {
755 	xpvtap_state_t *state;
756 
757 
758 	state = (xpvtap_state_t *)arg;
759 
760 	/* wake thread, thread handles guest requests and user app responses */
761 	mutex_enter(&state->bt_thread.ut_mutex);
762 	state->bt_thread.ut_wake = B_TRUE;
763 	cv_signal(&state->bt_thread.ut_wake_cv);
764 	mutex_exit(&state->bt_thread.ut_mutex);
765 
766 	return (DDI_INTR_CLAIMED);
767 }
768 
769 
770 /*
771  * xpvtap_segmf_register()
772  */
773 static int
774 xpvtap_segmf_register(xpvtap_state_t *state)
775 {
776 	struct seg *seg;
777 	uint64_t pte_ma;
778 	struct as *as;
779 	caddr_t uaddr;
780 	uint_t pgcnt;
781 	int i;
782 
783 
784 	as = state->bt_map.um_as;
785 	pgcnt = btopr(state->bt_map.um_guest_size);
786 	uaddr = state->bt_map.um_guest_pages;
787 
788 	if (pgcnt == 0) {
789 		return (DDI_FAILURE);
790 	}
791 
792 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
793 
794 	seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
795 	if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) >
796 	    (seg->s_base + seg->s_size))) {
797 		AS_LOCK_EXIT(as, &as->a_lock);
798 		return (DDI_FAILURE);
799 	}
800 
801 	/*
802 	 * lock down the htables so the HAT can't steal them. Register the
803 	 * PTE MA's for each gref page with seg_mf so we can do user space
804 	 * gref mappings.
805 	 */
806 	for (i = 0; i < pgcnt; i++) {
807 		hat_prepare_mapping(as->a_hat, uaddr, &pte_ma);
808 		hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0,
809 		    PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK,
810 		    HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
811 		hat_release_mapping(as->a_hat, uaddr);
812 		segmf_add_gref_pte(seg, uaddr, pte_ma);
813 		uaddr += PAGESIZE;
814 	}
815 
816 	state->bt_map.um_registered = B_TRUE;
817 
818 	AS_LOCK_EXIT(as, &as->a_lock);
819 
820 	return (DDI_SUCCESS);
821 }
822 
823 
824 /*
825  * xpvtap_segmf_unregister()
826  *    as_callback routine
827  */
828 /*ARGSUSED*/
829 static void
830 xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event)
831 {
832 	xpvtap_state_t *state;
833 	caddr_t uaddr;
834 	uint_t pgcnt;
835 	int i;
836 
837 
838 	state = (xpvtap_state_t *)arg;
839 	if (!state->bt_map.um_registered) {
840 		return;
841 	}
842 
843 	pgcnt = btopr(state->bt_map.um_guest_size);
844 	uaddr = state->bt_map.um_guest_pages;
845 
846 	/* unmap any outstanding req's grefs */
847 	xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state);
848 
849 	/* Unlock the gref pages */
850 	for (i = 0; i < pgcnt; i++) {
851 		AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
852 		hat_prepare_mapping(as->a_hat, uaddr, NULL);
853 		hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
854 
855 		/* XXX Need to verify if we still need this */
856 		hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0,
857 		    PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK,
858 		    HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
859 		hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
860 
861 		hat_release_mapping(as->a_hat, uaddr);
862 		AS_LOCK_EXIT(as, &as->a_lock);
863 		uaddr += PAGESIZE;
864 	}
865 
866 	/* remove the callback (which is this routine) */
867 	(void) as_delete_callback(as, arg);
868 
869 	state->bt_map.um_registered = B_FALSE;
870 }
871 
872 
873 /*
874  * xpvtap_user_init()
875  */
876 static int
877 xpvtap_user_init(xpvtap_state_t *state)
878 {
879 	xpvtap_user_map_t *map;
880 	int e;
881 
882 
883 	map = &state->bt_map;
884 
885 	/* Setup the ring between the driver and user app */
886 	e = xpvtap_user_ring_init(state);
887 	if (e != DDI_SUCCESS) {
888 		return (DDI_FAILURE);
889 	}
890 
891 	/*
892 	 * the user ring can handle BLKIF_RING_SIZE outstanding requests. This
893 	 * is the same number of requests as the guest ring. Initialize the
894 	 * state we use to track request IDs to the user app. These IDs will
895 	 * also identify which group of gref pages correspond with the
896 	 * request.
897 	 */
898 	xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs);
899 
900 	/*
901 	 * allocate the space to store a copy of each outstanding requests. We
902 	 * will need to reference the ID and the number of segments when we
903 	 * get the response from the user app.
904 	 */
905 	map->um_outstanding_reqs = kmem_zalloc(
906 	    sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE,
907 	    KM_SLEEP);
908 
909 	/*
910 	 * initialize the thread we use to process guest requests and user
911 	 * responses.
912 	 */
913 	e = xpvtap_user_thread_init(state);
914 	if (e != DDI_SUCCESS) {
915 		goto userinitfail_user_thread_init;
916 	}
917 
918 	return (DDI_SUCCESS);
919 
920 userinitfail_user_thread_init:
921 	xpvtap_rs_fini(&map->um_rs);
922 	kmem_free(map->um_outstanding_reqs,
923 	    sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
924 	xpvtap_user_ring_fini(state);
925 	return (DDI_FAILURE);
926 }
927 
928 
929 /*
930  * xpvtap_user_ring_init()
931  */
932 static int
933 xpvtap_user_ring_init(xpvtap_state_t *state)
934 {
935 	xpvtap_user_ring_t *usring;
936 
937 
938 	usring = &state->bt_user_ring;
939 
940 	/* alocate and initialize the page for the shared user ring */
941 	usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE,
942 	    DDI_UMEM_SLEEP, &usring->ur_cookie);
943 	SHARED_RING_INIT(usring->ur_sring);
944 	FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE);
945 	usring->ur_prod_polled = 0;
946 
947 	return (DDI_SUCCESS);
948 }
949 
950 
951 /*
952  * xpvtap_user_thread_init()
953  */
954 static int
955 xpvtap_user_thread_init(xpvtap_state_t *state)
956 {
957 	xpvtap_user_thread_t *thread;
958 	char taskqname[32];
959 
960 
961 	thread = &state->bt_thread;
962 
963 	mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL);
964 	cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL);
965 	cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL);
966 	thread->ut_wake = B_FALSE;
967 	thread->ut_exit = B_FALSE;
968 	thread->ut_exit_done = B_TRUE;
969 
970 	/* create but don't start the user thread */
971 	(void) sprintf(taskqname, "xvptap_%d", state->bt_instance);
972 	thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1,
973 	    TASKQ_DEFAULTPRI, 0);
974 	if (thread->ut_taskq == NULL) {
975 		goto userinitthrfail_taskq_create;
976 	}
977 
978 	return (DDI_SUCCESS);
979 
980 userinitthrfail_taskq_dispatch:
981 	ddi_taskq_destroy(thread->ut_taskq);
982 userinitthrfail_taskq_create:
983 	cv_destroy(&thread->ut_exit_done_cv);
984 	cv_destroy(&thread->ut_wake_cv);
985 	mutex_destroy(&thread->ut_mutex);
986 
987 	return (DDI_FAILURE);
988 }
989 
990 
991 /*
992  * xpvtap_user_thread_start()
993  */
994 static void
995 xpvtap_user_thread_start(caddr_t arg)
996 {
997 	xpvtap_user_thread_t *thread;
998 	xpvtap_state_t *state;
999 	int e;
1000 
1001 
1002 	state = (xpvtap_state_t *)arg;
1003 	thread = &state->bt_thread;
1004 
1005 	/* start the user thread */
1006 	thread->ut_exit_done = B_FALSE;
1007 	e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state,
1008 	    DDI_SLEEP);
1009 	if (e != DDI_SUCCESS) {
1010 		thread->ut_exit_done = B_TRUE;
1011 		cmn_err(CE_WARN, "Unable to start user thread\n");
1012 	}
1013 }
1014 
1015 
1016 /*
1017  * xpvtap_user_thread_stop()
1018  */
1019 static void
1020 xpvtap_user_thread_stop(xpvtap_state_t *state)
1021 {
1022 	/* wake thread so it can exit */
1023 	mutex_enter(&state->bt_thread.ut_mutex);
1024 	state->bt_thread.ut_wake = B_TRUE;
1025 	state->bt_thread.ut_exit = B_TRUE;
1026 	cv_signal(&state->bt_thread.ut_wake_cv);
1027 	if (!state->bt_thread.ut_exit_done) {
1028 		cv_wait(&state->bt_thread.ut_exit_done_cv,
1029 		    &state->bt_thread.ut_mutex);
1030 	}
1031 	mutex_exit(&state->bt_thread.ut_mutex);
1032 	ASSERT(state->bt_thread.ut_exit_done);
1033 }
1034 
1035 
1036 /*
1037  * xpvtap_user_fini()
1038  */
1039 static void
1040 xpvtap_user_fini(xpvtap_state_t *state)
1041 {
1042 	xpvtap_user_map_t *map;
1043 
1044 
1045 	map = &state->bt_map;
1046 
1047 	xpvtap_user_thread_fini(state);
1048 	xpvtap_rs_fini(&map->um_rs);
1049 	kmem_free(map->um_outstanding_reqs,
1050 	    sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
1051 	xpvtap_user_ring_fini(state);
1052 }
1053 
1054 
1055 /*
1056  * xpvtap_user_ring_fini()
1057  */
1058 static void
1059 xpvtap_user_ring_fini(xpvtap_state_t *state)
1060 {
1061 	ddi_umem_free(state->bt_user_ring.ur_cookie);
1062 }
1063 
1064 
1065 /*
1066  * xpvtap_user_thread_fini()
1067  */
1068 static void
1069 xpvtap_user_thread_fini(xpvtap_state_t *state)
1070 {
1071 	ddi_taskq_destroy(state->bt_thread.ut_taskq);
1072 	cv_destroy(&state->bt_thread.ut_exit_done_cv);
1073 	cv_destroy(&state->bt_thread.ut_wake_cv);
1074 	mutex_destroy(&state->bt_thread.ut_mutex);
1075 }
1076 
1077 
1078 /*
1079  * xpvtap_user_thread()
1080  */
1081 static void
1082 xpvtap_user_thread(void *arg)
1083 {
1084 	xpvtap_user_thread_t *thread;
1085 	blkif_response_t resp;
1086 	xpvtap_state_t *state;
1087 	blkif_request_t req;
1088 	boolean_t b;
1089 	uint_t uid;
1090 	int e;
1091 
1092 
1093 	state = (xpvtap_state_t *)arg;
1094 	thread = &state->bt_thread;
1095 
1096 xpvtap_thread_start:
1097 	/* See if we are supposed to exit */
1098 	mutex_enter(&thread->ut_mutex);
1099 	if (thread->ut_exit) {
1100 		thread->ut_exit_done = B_TRUE;
1101 		cv_signal(&state->bt_thread.ut_exit_done_cv);
1102 		mutex_exit(&thread->ut_mutex);
1103 		return;
1104 	}
1105 
1106 	/*
1107 	 * if we aren't supposed to be awake, wait until someone wakes us.
1108 	 * when we wake up, check for a kill or someone telling us to exit.
1109 	 */
1110 	if (!thread->ut_wake) {
1111 		e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex);
1112 		if ((e == 0) || (thread->ut_exit)) {
1113 			thread->ut_exit = B_TRUE;
1114 			mutex_exit(&thread->ut_mutex);
1115 			goto xpvtap_thread_start;
1116 		}
1117 	}
1118 
1119 	/* if someone didn't wake us, go back to the start of the thread */
1120 	if (!thread->ut_wake) {
1121 		mutex_exit(&thread->ut_mutex);
1122 		goto xpvtap_thread_start;
1123 	}
1124 
1125 	/* we are awake */
1126 	thread->ut_wake = B_FALSE;
1127 	mutex_exit(&thread->ut_mutex);
1128 
1129 	/* process requests from the guest */
1130 	do {
1131 		/*
1132 		 * check for requests from the guest. if we don't have any,
1133 		 * break out of the loop.
1134 		 */
1135 		e = blk_ring_request_get(state->bt_guest_ring, &req);
1136 		if (e == B_FALSE) {
1137 			break;
1138 		}
1139 
1140 		/* we got a request, map the grefs into the user app's VA */
1141 		e = xpvtap_user_request_map(state, &req, &uid);
1142 		if (e != DDI_SUCCESS) {
1143 			/*
1144 			 * If we couldn't map the request (e.g. user app hasn't
1145 			 * opened the device yet), requeue it and try again
1146 			 * later
1147 			 */
1148 			blk_ring_request_requeue(state->bt_guest_ring);
1149 			break;
1150 		}
1151 
1152 		/* push the request to the user app */
1153 		e = xpvtap_user_request_push(state, &req, uid);
1154 		if (e != DDI_SUCCESS) {
1155 			resp.id = req.id;
1156 			resp.operation = req.operation;
1157 			resp.status = BLKIF_RSP_ERROR;
1158 			blk_ring_response_put(state->bt_guest_ring, &resp);
1159 		}
1160 	} while (!thread->ut_exit);
1161 
1162 	/* process reponses from the user app */
1163 	do {
1164 		/*
1165 		 * check for responses from the user app. if we don't have any,
1166 		 * break out of the loop.
1167 		 */
1168 		b = xpvtap_user_response_get(state, &resp, &uid);
1169 		if (b != B_TRUE) {
1170 			break;
1171 		}
1172 
1173 		/*
1174 		 * if we got a response, unmap the grefs from the matching
1175 		 * request.
1176 		 */
1177 		xpvtap_user_request_unmap(state, uid);
1178 
1179 		/* push the response to the guest */
1180 		blk_ring_response_put(state->bt_guest_ring, &resp);
1181 	} while (!thread->ut_exit);
1182 
1183 	goto xpvtap_thread_start;
1184 }
1185 
1186 
1187 /*
1188  * xpvtap_user_request_map()
1189  */
1190 static int
1191 xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
1192     uint_t *uid)
1193 {
1194 	grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1195 	struct seg *seg;
1196 	struct as *as;
1197 	domid_t domid;
1198 	caddr_t uaddr;
1199 	uint_t flags;
1200 	int i;
1201 	int e;
1202 
1203 
1204 	domid = xvdi_get_oeid(state->bt_dip);
1205 
1206 	as = state->bt_map.um_as;
1207 	if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) {
1208 		return (DDI_FAILURE);
1209 	}
1210 
1211 	/* has to happen after segmap returns */
1212 	if (!state->bt_map.um_registered) {
1213 		/* register the pte's with segmf */
1214 		e = xpvtap_segmf_register(state);
1215 		if (e != DDI_SUCCESS) {
1216 			return (DDI_FAILURE);
1217 		}
1218 	}
1219 
1220 	/* alloc an ID for the user ring */
1221 	e = xpvtap_rs_alloc(state->bt_map.um_rs, uid);
1222 	if (e != DDI_SUCCESS) {
1223 		return (DDI_FAILURE);
1224 	}
1225 
1226 	/* if we don't have any segments to map, we're done */
1227 	if ((req->operation == BLKIF_OP_WRITE_BARRIER) ||
1228 	    (req->operation == BLKIF_OP_FLUSH_DISKCACHE) ||
1229 	    (req->nr_segments == 0)) {
1230 		return (DDI_SUCCESS);
1231 	}
1232 
1233 	/* get the apps gref address */
1234 	uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid);
1235 
1236 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1237 	seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1238 	if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1239 	    (seg->s_base + seg->s_size))) {
1240 		AS_LOCK_EXIT(as, &as->a_lock);
1241 		return (DDI_FAILURE);
1242 	}
1243 
1244 	/* if we are reading from disk, we are writing into memory */
1245 	flags = 0;
1246 	if (req->operation == BLKIF_OP_READ) {
1247 		flags |= SEGMF_GREF_WR;
1248 	}
1249 
1250 	/* Load the grefs into seg_mf */
1251 	for (i = 0; i < req->nr_segments; i++) {
1252 		gref[i] = req->seg[i].gref;
1253 	}
1254 	(void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments,
1255 	    domid);
1256 
1257 	AS_LOCK_EXIT(as, &as->a_lock);
1258 
1259 	return (DDI_SUCCESS);
1260 }
1261 
1262 
1263 /*
1264  * xpvtap_user_request_push()
1265  */
1266 static int
1267 xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req,
1268     uint_t uid)
1269 {
1270 	blkif_request_t *outstanding_req;
1271 	blkif_front_ring_t *uring;
1272 	blkif_request_t *target;
1273 	xpvtap_user_map_t *map;
1274 
1275 
1276 	uring = &state->bt_user_ring.ur_ring;
1277 	map = &state->bt_map;
1278 
1279 	target = RING_GET_REQUEST(uring, uring->req_prod_pvt);
1280 
1281 	/*
1282 	 * Save request from the frontend. used for ID mapping and unmap
1283 	 * on response/cleanup
1284 	 */
1285 	outstanding_req = &map->um_outstanding_reqs[uid];
1286 	bcopy(req, outstanding_req, sizeof (*outstanding_req));
1287 
1288 	/* put the request on the user ring */
1289 	bcopy(req, target, sizeof (*req));
1290 	target->id = (uint64_t)uid;
1291 	uring->req_prod_pvt++;
1292 
1293 	pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM);
1294 
1295 	return (DDI_SUCCESS);
1296 }
1297 
1298 
1299 static void
1300 xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid)
1301 {
1302 	blkif_request_t *req;
1303 	struct seg *seg;
1304 	struct as *as;
1305 	caddr_t uaddr;
1306 	int e;
1307 
1308 
1309 	as = state->bt_map.um_as;
1310 	if (as == NULL) {
1311 		return;
1312 	}
1313 
1314 	/* get a copy of the original request */
1315 	req = &state->bt_map.um_outstanding_reqs[uid];
1316 
1317 	/* unmap the grefs for this request */
1318 	if ((req->operation != BLKIF_OP_WRITE_BARRIER) &&
1319 	    (req->operation != BLKIF_OP_FLUSH_DISKCACHE) &&
1320 	    (req->nr_segments != 0)) {
1321 		uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid);
1322 		AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1323 		seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1324 		if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1325 		    (seg->s_base + seg->s_size))) {
1326 			AS_LOCK_EXIT(as, &as->a_lock);
1327 			xpvtap_rs_free(state->bt_map.um_rs, uid);
1328 			return;
1329 		}
1330 
1331 		e = segmf_release_grefs(seg, uaddr, req->nr_segments);
1332 		if (e != 0) {
1333 			cmn_err(CE_WARN, "unable to release grefs");
1334 		}
1335 
1336 		AS_LOCK_EXIT(as, &as->a_lock);
1337 	}
1338 
1339 	/* free up the user ring id */
1340 	xpvtap_rs_free(state->bt_map.um_rs, uid);
1341 }
1342 
1343 
1344 static int
1345 xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp,
1346     uint_t *uid)
1347 {
1348 	blkif_front_ring_t *uring;
1349 	blkif_response_t *target;
1350 
1351 
1352 	uring = &state->bt_user_ring.ur_ring;
1353 
1354 	if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) {
1355 		return (B_FALSE);
1356 	}
1357 
1358 	target = NULL;
1359 	target = RING_GET_RESPONSE(uring, uring->rsp_cons);
1360 	if (target == NULL) {
1361 		return (B_FALSE);
1362 	}
1363 
1364 	/* copy out the user app response */
1365 	bcopy(target, resp, sizeof (*resp));
1366 	uring->rsp_cons++;
1367 
1368 	/* restore the quests id from the original request */
1369 	*uid = (uint_t)resp->id;
1370 	resp->id = state->bt_map.um_outstanding_reqs[*uid].id;
1371 
1372 	return (B_TRUE);
1373 }
1374 
1375 
1376 /*
1377  * xpvtap_user_app_stop()
1378  */
1379 static void xpvtap_user_app_stop(caddr_t arg)
1380 {
1381 	xpvtap_state_t *state;
1382 	clock_t timeout;
1383 	clock_t rc;
1384 
1385 
1386 	state = (xpvtap_state_t *)arg;
1387 
1388 	/*
1389 	 * Give the app 10 secs to exit. If it doesn't exit, it's not a serious
1390 	 * problem, we just won't auto-detach the driver.
1391 	 */
1392 	mutex_enter(&state->bt_open.bo_mutex);
1393 	if (state->bt_open.bo_opened) {
1394 		timeout = ddi_get_lbolt() + drv_usectohz(10000000);
1395 		rc = cv_timedwait(&state->bt_open.bo_exit_cv,
1396 		    &state->bt_open.bo_mutex, timeout);
1397 		if (rc <= 0) {
1398 			cmn_err(CE_NOTE, "!user process still has driver open, "
1399 			    "deferring detach\n");
1400 		}
1401 	}
1402 	mutex_exit(&state->bt_open.bo_mutex);
1403 }
1404 
1405 
1406 /*
1407  * xpvtap_rs_init()
1408  *    Initialize the resource structure. init() returns a handle to be used
1409  *    for the rest of the resource functions. This code is written assuming
1410  *    that min_val will be close to 0. Therefore, we will allocate the free
1411  *    buffer only taking max_val into account.
1412  */
1413 static void
1414 xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle)
1415 {
1416 	xpvtap_rs_t *rstruct;
1417 	uint_t array_size;
1418 	uint_t index;
1419 
1420 
1421 	ASSERT(handle != NULL);
1422 	ASSERT(min_val < max_val);
1423 
1424 	/* alloc space for resource structure */
1425 	rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP);
1426 
1427 	/*
1428 	 * Test to see if the max value is 64-bit aligned. If so, we don't need
1429 	 * to allocate an extra 64-bit word. alloc space for free buffer
1430 	 * (8 bytes per uint64_t).
1431 	 */
1432 	if ((max_val & 0x3F) == 0) {
1433 		rstruct->rs_free_size = (max_val >> 6) * 8;
1434 	} else {
1435 		rstruct->rs_free_size = ((max_val >> 6) + 1) * 8;
1436 	}
1437 	rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP);
1438 
1439 	/* Initialize resource structure */
1440 	rstruct->rs_min = min_val;
1441 	rstruct->rs_last = min_val;
1442 	rstruct->rs_max = max_val;
1443 	mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL);
1444 	rstruct->rs_flushing = B_FALSE;
1445 
1446 	/* Mark all resources as free */
1447 	array_size = rstruct->rs_free_size >> 3;
1448 	for (index = 0; index < array_size; index++) {
1449 		rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF;
1450 	}
1451 
1452 	/* setup handle which is returned from this function */
1453 	*handle = rstruct;
1454 }
1455 
1456 
1457 /*
1458  * xpvtap_rs_fini()
1459  *    Frees up the space allocated in init().  Notice that a pointer to the
1460  *    handle is used for the parameter.  fini() will set the handle to NULL
1461  *    before returning.
1462  */
1463 static void
1464 xpvtap_rs_fini(xpvtap_rs_hdl_t *handle)
1465 {
1466 	xpvtap_rs_t *rstruct;
1467 
1468 
1469 	ASSERT(handle != NULL);
1470 
1471 	rstruct = (xpvtap_rs_t *)*handle;
1472 
1473 	mutex_destroy(&rstruct->rs_mutex);
1474 	kmem_free(rstruct->rs_free, rstruct->rs_free_size);
1475 	kmem_free(rstruct, sizeof (xpvtap_rs_t));
1476 
1477 	/* set handle to null.  This helps catch bugs. */
1478 	*handle = NULL;
1479 }
1480 
1481 
1482 /*
1483  * xpvtap_rs_alloc()
1484  *    alloc a resource. If alloc fails, we are out of resources.
1485  */
1486 static int
1487 xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource)
1488 {
1489 	xpvtap_rs_t *rstruct;
1490 	uint_t array_idx;
1491 	uint64_t free;
1492 	uint_t index;
1493 	uint_t last;
1494 	uint_t min;
1495 	uint_t max;
1496 
1497 
1498 	ASSERT(handle != NULL);
1499 	ASSERT(resource != NULL);
1500 
1501 	rstruct = (xpvtap_rs_t *)handle;
1502 
1503 	mutex_enter(&rstruct->rs_mutex);
1504 	min = rstruct->rs_min;
1505 	max = rstruct->rs_max;
1506 
1507 	/*
1508 	 * Find a free resource. This will return out of the loop once it finds
1509 	 * a free resource. There are a total of 'max'-'min'+1 resources.
1510 	 * Performs a round robin allocation.
1511 	 */
1512 	for (index = min; index <= max; index++) {
1513 
1514 		array_idx = rstruct->rs_last >> 6;
1515 		free = rstruct->rs_free[array_idx];
1516 		last = rstruct->rs_last & 0x3F;
1517 
1518 		/* if the next resource to check is free */
1519 		if ((free & ((uint64_t)1 << last)) != 0) {
1520 			/* we are using this resource */
1521 			*resource = rstruct->rs_last;
1522 
1523 			/* take it out of the free list */
1524 			rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last);
1525 
1526 			/*
1527 			 * increment the last count so we start checking the
1528 			 * next resource on the next alloc().  Note the rollover
1529 			 * at 'max'+1.
1530 			 */
1531 			rstruct->rs_last++;
1532 			if (rstruct->rs_last > max) {
1533 				rstruct->rs_last = rstruct->rs_min;
1534 			}
1535 
1536 			/* unlock the resource structure */
1537 			mutex_exit(&rstruct->rs_mutex);
1538 
1539 			return (DDI_SUCCESS);
1540 		}
1541 
1542 		/*
1543 		 * This resource is not free, lets go to the next one. Note the
1544 		 * rollover at 'max'.
1545 		 */
1546 		rstruct->rs_last++;
1547 		if (rstruct->rs_last > max) {
1548 			rstruct->rs_last = rstruct->rs_min;
1549 		}
1550 	}
1551 
1552 	mutex_exit(&rstruct->rs_mutex);
1553 
1554 	return (DDI_FAILURE);
1555 }
1556 
1557 
1558 /*
1559  * xpvtap_rs_free()
1560  *    Free the previously alloc'd resource.  Once a resource has been free'd,
1561  *    it can be used again when alloc is called.
1562  */
1563 static void
1564 xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource)
1565 {
1566 	xpvtap_rs_t *rstruct;
1567 	uint_t array_idx;
1568 	uint_t offset;
1569 
1570 
1571 	ASSERT(handle != NULL);
1572 
1573 	rstruct = (xpvtap_rs_t *)handle;
1574 	ASSERT(resource >= rstruct->rs_min);
1575 	ASSERT(resource <= rstruct->rs_max);
1576 
1577 	if (!rstruct->rs_flushing) {
1578 		mutex_enter(&rstruct->rs_mutex);
1579 	}
1580 
1581 	/* Put the resource back in the free list */
1582 	array_idx = resource >> 6;
1583 	offset = resource & 0x3F;
1584 	rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset);
1585 
1586 	if (!rstruct->rs_flushing) {
1587 		mutex_exit(&rstruct->rs_mutex);
1588 	}
1589 }
1590 
1591 
1592 /*
1593  * xpvtap_rs_flush()
1594  */
1595 static void
1596 xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback,
1597     void *arg)
1598 {
1599 	xpvtap_rs_t *rstruct;
1600 	uint_t array_idx;
1601 	uint64_t free;
1602 	uint_t index;
1603 	uint_t last;
1604 	uint_t min;
1605 	uint_t max;
1606 
1607 
1608 	ASSERT(handle != NULL);
1609 
1610 	rstruct = (xpvtap_rs_t *)handle;
1611 
1612 	mutex_enter(&rstruct->rs_mutex);
1613 	min = rstruct->rs_min;
1614 	max = rstruct->rs_max;
1615 
1616 	rstruct->rs_flushing = B_TRUE;
1617 
1618 	/*
1619 	 * for all resources not free, call the callback routine to clean it
1620 	 * up.
1621 	 */
1622 	for (index = min; index <= max; index++) {
1623 
1624 		array_idx = rstruct->rs_last >> 6;
1625 		free = rstruct->rs_free[array_idx];
1626 		last = rstruct->rs_last & 0x3F;
1627 
1628 		/* if the next resource to check is not free */
1629 		if ((free & ((uint64_t)1 << last)) == 0) {
1630 			/* call the callback to cleanup */
1631 			(*callback)(arg, rstruct->rs_last);
1632 
1633 			/* put it back in the free list */
1634 			rstruct->rs_free[array_idx] |= ((uint64_t)1 << last);
1635 		}
1636 
1637 		/* go to the next one. Note the rollover at 'max' */
1638 		rstruct->rs_last++;
1639 		if (rstruct->rs_last > max) {
1640 			rstruct->rs_last = rstruct->rs_min;
1641 		}
1642 	}
1643 
1644 	mutex_exit(&rstruct->rs_mutex);
1645 }
1646