xref: /illumos-gate/usr/src/uts/common/xen/io/xpvtap.c (revision 33efde4275d24731ef87927237b0ffb0630b6b2d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  * Copyright 2017 Joyent, Inc.
26  */
27 
28 
29 #include <sys/errno.h>
30 #include <sys/types.h>
31 #include <sys/conf.h>
32 #include <sys/kmem.h>
33 #include <sys/ddi.h>
34 #include <sys/stat.h>
35 #include <sys/sunddi.h>
36 #include <sys/file.h>
37 #include <sys/open.h>
38 #include <sys/modctl.h>
39 #include <sys/ddi_impldefs.h>
40 #include <sys/sysmacros.h>
41 #include <sys/ddidevmap.h>
42 #include <sys/policy.h>
43 
44 #include <sys/vmsystm.h>
45 #include <vm/hat_i86.h>
46 #include <vm/hat_pte.h>
47 #include <vm/seg_kmem.h>
48 #include <vm/seg_mf.h>
49 
50 #include <xen/io/blkif_impl.h>
51 #include <xen/io/blk_common.h>
52 #include <xen/io/xpvtap.h>
53 
54 
55 static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred);
56 static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred);
57 static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
58     cred_t *cred, int *rval);
59 static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off,
60     size_t len, size_t *maplen, uint_t model);
61 static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
62     off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
63     cred_t *cred_p);
64 static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
65     struct pollhead **phpp);
66 
67 static 	struct cb_ops xpvtap_cb_ops = {
68 	xpvtap_open,		/* cb_open */
69 	xpvtap_close,		/* cb_close */
70 	nodev,			/* cb_strategy */
71 	nodev,			/* cb_print */
72 	nodev,			/* cb_dump */
73 	nodev,			/* cb_read */
74 	nodev,			/* cb_write */
75 	xpvtap_ioctl,		/* cb_ioctl */
76 	xpvtap_devmap,		/* cb_devmap */
77 	nodev,			/* cb_mmap */
78 	xpvtap_segmap,		/* cb_segmap */
79 	xpvtap_chpoll,		/* cb_chpoll */
80 	ddi_prop_op,		/* cb_prop_op */
81 	NULL,			/* cb_stream */
82 	D_NEW | D_MP | D_64BIT | D_DEVMAP,	/* cb_flag */
83 	CB_REV
84 };
85 
86 static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
87     void **result);
88 static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
89 static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
90 
91 static struct dev_ops xpvtap_dev_ops = {
92 	DEVO_REV,		/* devo_rev */
93 	0,			/* devo_refcnt */
94 	xpvtap_getinfo,		/* devo_getinfo */
95 	nulldev,		/* devo_identify */
96 	nulldev,		/* devo_probe */
97 	xpvtap_attach,		/* devo_attach */
98 	xpvtap_detach,		/* devo_detach */
99 	nodev,			/* devo_reset */
100 	&xpvtap_cb_ops,		/* devo_cb_ops */
101 	NULL,			/* devo_bus_ops */
102 	NULL			/* power */
103 };
104 
105 
106 static struct modldrv xpvtap_modldrv = {
107 	&mod_driverops,		/* Type of module.  This one is a driver */
108 	"xpvtap driver",	/* Name of the module. */
109 	&xpvtap_dev_ops,	/* driver ops */
110 };
111 
112 static struct modlinkage xpvtap_modlinkage = {
113 	MODREV_1,
114 	(void *) &xpvtap_modldrv,
115 	NULL
116 };
117 
118 
119 void *xpvtap_statep;
120 
121 
122 static xpvtap_state_t *xpvtap_drv_init(int instance);
123 static void xpvtap_drv_fini(xpvtap_state_t *state);
124 static uint_t xpvtap_intr(caddr_t arg);
125 
126 typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs);
127 static void xpvtap_rs_init(uint_t min_val, uint_t max_val,
128     xpvtap_rs_hdl_t *handle);
129 static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle);
130 static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs);
131 static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs);
132 static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle,
133     xpvtap_rs_cleanup_t callback, void *arg);
134 
135 static int xpvtap_segmf_register(xpvtap_state_t *state);
136 static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event);
137 
138 static int xpvtap_user_init(xpvtap_state_t *state);
139 static void xpvtap_user_fini(xpvtap_state_t *state);
140 static int xpvtap_user_ring_init(xpvtap_state_t *state);
141 static void xpvtap_user_ring_fini(xpvtap_state_t *state);
142 static int xpvtap_user_thread_init(xpvtap_state_t *state);
143 static void xpvtap_user_thread_fini(xpvtap_state_t *state);
144 static void xpvtap_user_thread_start(caddr_t arg);
145 static void xpvtap_user_thread_stop(xpvtap_state_t *state);
146 static void xpvtap_user_thread(void *arg);
147 
148 static void xpvtap_user_app_stop(caddr_t arg);
149 
150 static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
151     uint_t *uid);
152 static int xpvtap_user_request_push(xpvtap_state_t *state,
153     blkif_request_t *req, uint_t uid);
154 static int xpvtap_user_response_get(xpvtap_state_t *state,
155     blkif_response_t *resp, uint_t *uid);
156 static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid);
157 
158 
159 /*
160  * _init()
161  */
162 int
_init(void)163 _init(void)
164 {
165 	int e;
166 
167 	e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1);
168 	if (e != 0) {
169 		return (e);
170 	}
171 
172 	e = mod_install(&xpvtap_modlinkage);
173 	if (e != 0) {
174 		ddi_soft_state_fini(&xpvtap_statep);
175 		return (e);
176 	}
177 
178 	return (0);
179 }
180 
181 
182 /*
183  * _info()
184  */
185 int
_info(struct modinfo * modinfop)186 _info(struct modinfo *modinfop)
187 {
188 	return (mod_info(&xpvtap_modlinkage, modinfop));
189 }
190 
191 
192 /*
193  * _fini()
194  */
195 int
_fini(void)196 _fini(void)
197 {
198 	int e;
199 
200 	e = mod_remove(&xpvtap_modlinkage);
201 	if (e != 0) {
202 		return (e);
203 	}
204 
205 	ddi_soft_state_fini(&xpvtap_statep);
206 
207 	return (0);
208 }
209 
210 
211 /*
212  * xpvtap_attach()
213  */
214 static int
xpvtap_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)215 xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
216 {
217 	blk_ringinit_args_t args;
218 	xpvtap_state_t *state;
219 	int instance;
220 	int e;
221 
222 
223 	switch (cmd) {
224 	case DDI_ATTACH:
225 		break;
226 
227 	case DDI_RESUME:
228 		return (DDI_SUCCESS);
229 
230 	default:
231 		return (DDI_FAILURE);
232 	}
233 
234 	/* initialize our state info */
235 	instance = ddi_get_instance(dip);
236 	state = xpvtap_drv_init(instance);
237 	if (state == NULL) {
238 		return (DDI_FAILURE);
239 	}
240 	state->bt_dip = dip;
241 
242 	/* Initialize the guest ring */
243 	args.ar_dip = state->bt_dip;
244 	args.ar_intr = xpvtap_intr;
245 	args.ar_intr_arg = (caddr_t)state;
246 	args.ar_ringup = xpvtap_user_thread_start;
247 	args.ar_ringup_arg = (caddr_t)state;
248 	args.ar_ringdown = xpvtap_user_app_stop;
249 	args.ar_ringdown_arg = (caddr_t)state;
250 	e = blk_ring_init(&args, &state->bt_guest_ring);
251 	if (e != DDI_SUCCESS) {
252 		goto attachfail_ringinit;
253 	}
254 
255 	/* create the minor node (for ioctl/mmap) */
256 	e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance,
257 	    DDI_PSEUDO, 0);
258 	if (e != DDI_SUCCESS) {
259 		goto attachfail_minor_node;
260 	}
261 
262 	/* Report that driver was loaded */
263 	ddi_report_dev(dip);
264 
265 	return (DDI_SUCCESS);
266 
267 attachfail_minor_node:
268 	blk_ring_fini(&state->bt_guest_ring);
269 attachfail_ringinit:
270 	xpvtap_drv_fini(state);
271 	return (DDI_FAILURE);
272 }
273 
274 
275 /*
276  * xpvtap_detach()
277  */
278 static int
xpvtap_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)279 xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
280 {
281 	xpvtap_state_t *state;
282 	int instance;
283 
284 
285 	instance = ddi_get_instance(dip);
286 	state = ddi_get_soft_state(xpvtap_statep, instance);
287 	if (state == NULL) {
288 		return (DDI_FAILURE);
289 	}
290 
291 	switch (cmd) {
292 	case DDI_DETACH:
293 		break;
294 
295 	case DDI_SUSPEND:
296 	default:
297 		return (DDI_FAILURE);
298 	}
299 
300 	xpvtap_user_thread_stop(state);
301 	blk_ring_fini(&state->bt_guest_ring);
302 	xpvtap_drv_fini(state);
303 	ddi_remove_minor_node(dip, NULL);
304 
305 	return (DDI_SUCCESS);
306 }
307 
308 
309 /*
310  * xpvtap_getinfo()
311  */
312 /*ARGSUSED*/
313 static int
xpvtap_getinfo(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)314 xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
315 {
316 	xpvtap_state_t *state;
317 	int instance;
318 	dev_t dev;
319 	int e;
320 
321 
322 	dev = (dev_t)arg;
323 	instance = getminor(dev);
324 
325 	switch (cmd) {
326 	case DDI_INFO_DEVT2DEVINFO:
327 		state = ddi_get_soft_state(xpvtap_statep, instance);
328 		if (state == NULL) {
329 			return (DDI_FAILURE);
330 		}
331 		*result = (void *)state->bt_dip;
332 		e = DDI_SUCCESS;
333 		break;
334 
335 	case DDI_INFO_DEVT2INSTANCE:
336 		*result = (void *)(uintptr_t)instance;
337 		e = DDI_SUCCESS;
338 		break;
339 
340 	default:
341 		e = DDI_FAILURE;
342 		break;
343 	}
344 
345 	return (e);
346 }
347 
348 
349 /*
350  * xpvtap_open()
351  */
352 /*ARGSUSED*/
353 static int
xpvtap_open(dev_t * devp,int flag,int otyp,cred_t * cred)354 xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred)
355 {
356 	xpvtap_state_t *state;
357 	int instance;
358 
359 
360 	if (secpolicy_xvm_control(cred)) {
361 		return (EPERM);
362 	}
363 
364 	instance = getminor(*devp);
365 	state = ddi_get_soft_state(xpvtap_statep, instance);
366 	if (state == NULL) {
367 		return (ENXIO);
368 	}
369 
370 	/* we should only be opened once */
371 	mutex_enter(&state->bt_open.bo_mutex);
372 	if (state->bt_open.bo_opened) {
373 		mutex_exit(&state->bt_open.bo_mutex);
374 		return (EBUSY);
375 	}
376 	state->bt_open.bo_opened = B_TRUE;
377 	mutex_exit(&state->bt_open.bo_mutex);
378 
379 	/*
380 	 * save the apps address space. need it for mapping/unmapping grefs
381 	 * since will be doing it in a separate kernel thread.
382 	 */
383 	state->bt_map.um_as = curproc->p_as;
384 
385 	return (0);
386 }
387 
388 
389 /*
390  * xpvtap_close()
391  */
392 /*ARGSUSED*/
393 static int
xpvtap_close(dev_t devp,int flag,int otyp,cred_t * cred)394 xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred)
395 {
396 	xpvtap_state_t *state;
397 	int instance;
398 
399 
400 	instance = getminor(devp);
401 	state = ddi_get_soft_state(xpvtap_statep, instance);
402 	if (state == NULL) {
403 		return (ENXIO);
404 	}
405 
406 	/*
407 	 * wake thread so it can cleanup and wait for it to exit so we can
408 	 * be sure it's not in the middle of processing a request/response.
409 	 */
410 	mutex_enter(&state->bt_thread.ut_mutex);
411 	state->bt_thread.ut_wake = B_TRUE;
412 	state->bt_thread.ut_exit = B_TRUE;
413 	cv_signal(&state->bt_thread.ut_wake_cv);
414 	if (!state->bt_thread.ut_exit_done) {
415 		cv_wait(&state->bt_thread.ut_exit_done_cv,
416 		    &state->bt_thread.ut_mutex);
417 	}
418 	ASSERT(state->bt_thread.ut_exit_done);
419 	mutex_exit(&state->bt_thread.ut_mutex);
420 
421 	state->bt_map.um_as = NULL;
422 	state->bt_map.um_guest_pages = NULL;
423 
424 	/*
425 	 * when the ring is brought down, a userland hotplug script is run
426 	 * which tries to bring the userland app down. We'll wait for a bit
427 	 * for the user app to exit. Notify the thread waiting that the app
428 	 * has closed the driver.
429 	 */
430 	mutex_enter(&state->bt_open.bo_mutex);
431 	ASSERT(state->bt_open.bo_opened);
432 	state->bt_open.bo_opened = B_FALSE;
433 	cv_signal(&state->bt_open.bo_exit_cv);
434 	mutex_exit(&state->bt_open.bo_mutex);
435 
436 	return (0);
437 }
438 
439 
440 /*
441  * xpvtap_ioctl()
442  */
443 /*ARGSUSED*/
444 static int
xpvtap_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * cred,int * rval)445 xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
446     int *rval)
447 {
448 	xpvtap_state_t *state;
449 	int instance;
450 
451 
452 	if (secpolicy_xvm_control(cred)) {
453 		return (EPERM);
454 	}
455 
456 	instance = getminor(dev);
457 	if (instance == -1) {
458 		return (EBADF);
459 	}
460 
461 	state = ddi_get_soft_state(xpvtap_statep, instance);
462 	if (state == NULL) {
463 		return (EBADF);
464 	}
465 
466 	switch (cmd) {
467 	case XPVTAP_IOCTL_RESP_PUSH:
468 		/*
469 		 * wake thread, thread handles guest requests and user app
470 		 * responses.
471 		 */
472 		mutex_enter(&state->bt_thread.ut_mutex);
473 		state->bt_thread.ut_wake = B_TRUE;
474 		cv_signal(&state->bt_thread.ut_wake_cv);
475 		mutex_exit(&state->bt_thread.ut_mutex);
476 		break;
477 
478 	default:
479 		cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd);
480 		return (ENXIO);
481 	}
482 
483 	return (0);
484 }
485 
486 
487 /*
488  * xpvtap_segmap()
489  */
490 /*ARGSUSED*/
491 static int
xpvtap_segmap(dev_t dev,off_t off,struct as * asp,caddr_t * addrp,off_t len,unsigned int prot,unsigned int maxprot,unsigned int flags,cred_t * cred_p)492 xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
493     off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
494     cred_t *cred_p)
495 {
496 	struct segmf_crargs a;
497 	xpvtap_state_t *state;
498 	int instance;
499 	int e;
500 
501 
502 	if (secpolicy_xvm_control(cred_p)) {
503 		return (EPERM);
504 	}
505 
506 	instance = getminor(dev);
507 	state = ddi_get_soft_state(xpvtap_statep, instance);
508 	if (state == NULL) {
509 		return (EBADF);
510 	}
511 
512 	/* the user app should be doing a MAP_SHARED mapping */
513 	if ((flags & MAP_TYPE) != MAP_SHARED) {
514 		return (EINVAL);
515 	}
516 
517 	/*
518 	 * if this is the user ring (offset = 0), devmap it (which ends up in
519 	 * xpvtap_devmap). devmap will alloc and map the ring into the
520 	 * app's VA space.
521 	 */
522 	if (off == 0) {
523 		e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len,
524 		    prot, maxprot, flags, cred_p);
525 		return (e);
526 	}
527 
528 	/* this should be the mmap for the gref pages (offset = PAGESIZE) */
529 	if (off != PAGESIZE) {
530 		return (EINVAL);
531 	}
532 
533 	/* make sure we get the size we're expecting */
534 	if (len != XPVTAP_GREF_BUFSIZE) {
535 		return (EINVAL);
536 	}
537 
538 	/*
539 	 * reserve user app VA space for the gref pages and use segmf to
540 	 * manage the backing store for the physical memory. segmf will
541 	 * map in/out the grefs and fault them in/out.
542 	 */
543 	ASSERT(asp == state->bt_map.um_as);
544 	as_rangelock(asp);
545 	if ((flags & MAP_FIXED) == 0) {
546 		map_addr(addrp, len, 0, 0, flags);
547 		if (*addrp == NULL) {
548 			as_rangeunlock(asp);
549 			return (ENOMEM);
550 		}
551 	} else {
552 		/* User specified address */
553 		(void) as_unmap(asp, *addrp, len);
554 	}
555 	a.dev = dev;
556 	a.prot = (uchar_t)prot;
557 	a.maxprot = (uchar_t)maxprot;
558 	e = as_map(asp, *addrp, len, segmf_create, &a);
559 	if (e != 0) {
560 		as_rangeunlock(asp);
561 		return (e);
562 	}
563 	as_rangeunlock(asp);
564 
565 	/*
566 	 * Stash user base address, and compute address where the request
567 	 * array will end up.
568 	 */
569 	state->bt_map.um_guest_pages = (caddr_t)*addrp;
570 	state->bt_map.um_guest_size = (size_t)len;
571 
572 	/* register an as callback so we can cleanup when the app goes away */
573 	e = as_add_callback(asp, xpvtap_segmf_unregister, state,
574 	    AS_UNMAP_EVENT, *addrp, len, KM_SLEEP);
575 	if (e != 0) {
576 		(void) as_unmap(asp, *addrp, len);
577 		return (EINVAL);
578 	}
579 
580 	/* wake thread to see if there are requests already queued up */
581 	mutex_enter(&state->bt_thread.ut_mutex);
582 	state->bt_thread.ut_wake = B_TRUE;
583 	cv_signal(&state->bt_thread.ut_wake_cv);
584 	mutex_exit(&state->bt_thread.ut_mutex);
585 
586 	return (0);
587 }
588 
589 
590 /*
591  * xpvtap_devmap()
592  */
593 /*ARGSUSED*/
594 static int
xpvtap_devmap(dev_t dev,devmap_cookie_t dhp,offset_t off,size_t len,size_t * maplen,uint_t model)595 xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
596     size_t *maplen, uint_t model)
597 {
598 	xpvtap_user_ring_t *usring;
599 	xpvtap_state_t *state;
600 	int instance;
601 	int e;
602 
603 
604 	instance = getminor(dev);
605 	state = ddi_get_soft_state(xpvtap_statep, instance);
606 	if (state == NULL) {
607 		return (EBADF);
608 	}
609 
610 	/* we should only get here if the offset was == 0 */
611 	if (off != 0) {
612 		return (EINVAL);
613 	}
614 
615 	/* we should only be mapping in one page */
616 	if (len != PAGESIZE) {
617 		return (EINVAL);
618 	}
619 
620 	/*
621 	 * we already allocated the user ring during driver attach, all we
622 	 * need to do is map it into the user app's VA.
623 	 */
624 	usring = &state->bt_user_ring;
625 	e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0,
626 	    PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL);
627 	if (e < 0) {
628 		return (e);
629 	}
630 
631 	/* return the size to compete the devmap */
632 	*maplen = PAGESIZE;
633 
634 	return (0);
635 }
636 
637 
638 /*
639  * xpvtap_chpoll()
640  */
641 static int
xpvtap_chpoll(dev_t dev,short events,int anyyet,short * reventsp,struct pollhead ** phpp)642 xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
643     struct pollhead **phpp)
644 {
645 	xpvtap_user_ring_t *usring;
646 	xpvtap_state_t *state;
647 	int instance;
648 
649 
650 	instance = getminor(dev);
651 	if (instance == -1) {
652 		return (EBADF);
653 	}
654 	state = ddi_get_soft_state(xpvtap_statep, instance);
655 	if (state == NULL) {
656 		return (EBADF);
657 	}
658 
659 	if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) {
660 		return (EINVAL);
661 	}
662 
663 	/*
664 	 * if we pushed requests on the user ring since the last poll, wakeup
665 	 * the user app
666 	 */
667 	*reventsp = 0;
668 	usring = &state->bt_user_ring;
669 	if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) {
670 
671 		/*
672 		 * XXX - is this faster here or xpvtap_user_request_push??
673 		 * prelim data says here.  Because less membars or because
674 		 * user thread will spin in poll requests before getting to
675 		 * responses?
676 		 */
677 		RING_PUSH_REQUESTS(&usring->ur_ring);
678 
679 		usring->ur_prod_polled = usring->ur_ring.sring->req_prod;
680 		*reventsp =  POLLIN | POLLRDNORM;
681 	}
682 
683 	if ((*reventsp == 0 && !anyyet) || (events & POLLET)) {
684 		*phpp = &state->bt_pollhead;
685 	}
686 
687 	return (0);
688 }
689 
690 
691 /*
692  * xpvtap_drv_init()
693  */
694 static xpvtap_state_t *
xpvtap_drv_init(int instance)695 xpvtap_drv_init(int instance)
696 {
697 	xpvtap_state_t *state;
698 	int e;
699 
700 
701 	e = ddi_soft_state_zalloc(xpvtap_statep, instance);
702 	if (e != DDI_SUCCESS) {
703 		return (NULL);
704 	}
705 	state = ddi_get_soft_state(xpvtap_statep, instance);
706 	if (state == NULL) {
707 		goto drvinitfail_get_soft_state;
708 	}
709 
710 	state->bt_instance = instance;
711 	mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL);
712 	cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL);
713 	state->bt_open.bo_opened = B_FALSE;
714 	state->bt_map.um_registered = B_FALSE;
715 
716 	/* initialize user ring, thread, mapping state */
717 	e = xpvtap_user_init(state);
718 	if (e != DDI_SUCCESS) {
719 		goto drvinitfail_userinit;
720 	}
721 
722 	return (state);
723 
724 drvinitfail_userinit:
725 	cv_destroy(&state->bt_open.bo_exit_cv);
726 	mutex_destroy(&state->bt_open.bo_mutex);
727 drvinitfail_get_soft_state:
728 	(void) ddi_soft_state_free(xpvtap_statep, instance);
729 	return (NULL);
730 }
731 
732 
733 /*
734  * xpvtap_drv_fini()
735  */
736 static void
xpvtap_drv_fini(xpvtap_state_t * state)737 xpvtap_drv_fini(xpvtap_state_t *state)
738 {
739 	xpvtap_user_fini(state);
740 	cv_destroy(&state->bt_open.bo_exit_cv);
741 	mutex_destroy(&state->bt_open.bo_mutex);
742 	(void) ddi_soft_state_free(xpvtap_statep, state->bt_instance);
743 }
744 
745 
746 /*
747  * xpvtap_intr()
748  *    this routine will be called when we have a request on the guest ring.
749  */
750 static uint_t
xpvtap_intr(caddr_t arg)751 xpvtap_intr(caddr_t arg)
752 {
753 	xpvtap_state_t *state;
754 
755 
756 	state = (xpvtap_state_t *)arg;
757 
758 	/* wake thread, thread handles guest requests and user app responses */
759 	mutex_enter(&state->bt_thread.ut_mutex);
760 	state->bt_thread.ut_wake = B_TRUE;
761 	cv_signal(&state->bt_thread.ut_wake_cv);
762 	mutex_exit(&state->bt_thread.ut_mutex);
763 
764 	return (DDI_INTR_CLAIMED);
765 }
766 
767 
768 /*
769  * xpvtap_segmf_register()
770  */
771 static int
xpvtap_segmf_register(xpvtap_state_t * state)772 xpvtap_segmf_register(xpvtap_state_t *state)
773 {
774 	struct seg *seg;
775 	uint64_t pte_ma;
776 	struct as *as;
777 	caddr_t uaddr;
778 	uint_t pgcnt;
779 	int i;
780 
781 
782 	as = state->bt_map.um_as;
783 	pgcnt = btopr(state->bt_map.um_guest_size);
784 	uaddr = state->bt_map.um_guest_pages;
785 
786 	if (pgcnt == 0) {
787 		return (DDI_FAILURE);
788 	}
789 
790 	AS_LOCK_ENTER(as, RW_READER);
791 
792 	seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
793 	if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) >
794 	    (seg->s_base + seg->s_size))) {
795 		AS_LOCK_EXIT(as);
796 		return (DDI_FAILURE);
797 	}
798 
799 	/*
800 	 * lock down the htables so the HAT can't steal them. Register the
801 	 * PTE MA's for each gref page with seg_mf so we can do user space
802 	 * gref mappings.
803 	 */
804 	for (i = 0; i < pgcnt; i++) {
805 		hat_prepare_mapping(as->a_hat, uaddr, &pte_ma);
806 		hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0,
807 		    PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK,
808 		    HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
809 		hat_release_mapping(as->a_hat, uaddr);
810 		segmf_add_gref_pte(seg, uaddr, pte_ma);
811 		uaddr += PAGESIZE;
812 	}
813 
814 	state->bt_map.um_registered = B_TRUE;
815 
816 	AS_LOCK_EXIT(as);
817 
818 	return (DDI_SUCCESS);
819 }
820 
821 
822 /*
823  * xpvtap_segmf_unregister()
824  *    as_callback routine
825  */
826 /*ARGSUSED*/
827 static void
xpvtap_segmf_unregister(struct as * as,void * arg,uint_t event)828 xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event)
829 {
830 	xpvtap_state_t *state;
831 	caddr_t uaddr;
832 	uint_t pgcnt;
833 	int i;
834 
835 
836 	state = (xpvtap_state_t *)arg;
837 	if (!state->bt_map.um_registered) {
838 		/* remove the callback (which is this routine) */
839 		(void) as_delete_callback(as, arg);
840 		return;
841 	}
842 
843 	pgcnt = btopr(state->bt_map.um_guest_size);
844 	uaddr = state->bt_map.um_guest_pages;
845 
846 	/* unmap any outstanding req's grefs */
847 	xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state);
848 
849 	/* Unlock the gref pages */
850 	for (i = 0; i < pgcnt; i++) {
851 		AS_LOCK_ENTER(as, RW_WRITER);
852 		hat_prepare_mapping(as->a_hat, uaddr, NULL);
853 		hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
854 		hat_release_mapping(as->a_hat, uaddr);
855 		AS_LOCK_EXIT(as);
856 		uaddr += PAGESIZE;
857 	}
858 
859 	/* remove the callback (which is this routine) */
860 	(void) as_delete_callback(as, arg);
861 
862 	state->bt_map.um_registered = B_FALSE;
863 }
864 
865 
866 /*
867  * xpvtap_user_init()
868  */
869 static int
xpvtap_user_init(xpvtap_state_t * state)870 xpvtap_user_init(xpvtap_state_t *state)
871 {
872 	xpvtap_user_map_t *map;
873 	int e;
874 
875 
876 	map = &state->bt_map;
877 
878 	/* Setup the ring between the driver and user app */
879 	e = xpvtap_user_ring_init(state);
880 	if (e != DDI_SUCCESS) {
881 		return (DDI_FAILURE);
882 	}
883 
884 	/*
885 	 * the user ring can handle BLKIF_RING_SIZE outstanding requests. This
886 	 * is the same number of requests as the guest ring. Initialize the
887 	 * state we use to track request IDs to the user app. These IDs will
888 	 * also identify which group of gref pages correspond with the
889 	 * request.
890 	 */
891 	xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs);
892 
893 	/*
894 	 * allocate the space to store a copy of each outstanding requests. We
895 	 * will need to reference the ID and the number of segments when we
896 	 * get the response from the user app.
897 	 */
898 	map->um_outstanding_reqs = kmem_zalloc(
899 	    sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE,
900 	    KM_SLEEP);
901 
902 	/*
903 	 * initialize the thread we use to process guest requests and user
904 	 * responses.
905 	 */
906 	e = xpvtap_user_thread_init(state);
907 	if (e != DDI_SUCCESS) {
908 		goto userinitfail_user_thread_init;
909 	}
910 
911 	return (DDI_SUCCESS);
912 
913 userinitfail_user_thread_init:
914 	xpvtap_rs_fini(&map->um_rs);
915 	kmem_free(map->um_outstanding_reqs,
916 	    sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
917 	xpvtap_user_ring_fini(state);
918 	return (DDI_FAILURE);
919 }
920 
921 
922 /*
923  * xpvtap_user_ring_init()
924  */
925 static int
xpvtap_user_ring_init(xpvtap_state_t * state)926 xpvtap_user_ring_init(xpvtap_state_t *state)
927 {
928 	xpvtap_user_ring_t *usring;
929 
930 
931 	usring = &state->bt_user_ring;
932 
933 	/* alocate and initialize the page for the shared user ring */
934 	usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE,
935 	    DDI_UMEM_SLEEP, &usring->ur_cookie);
936 	SHARED_RING_INIT(usring->ur_sring);
937 	FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE);
938 	usring->ur_prod_polled = 0;
939 
940 	return (DDI_SUCCESS);
941 }
942 
943 
944 /*
945  * xpvtap_user_thread_init()
946  */
947 static int
xpvtap_user_thread_init(xpvtap_state_t * state)948 xpvtap_user_thread_init(xpvtap_state_t *state)
949 {
950 	xpvtap_user_thread_t *thread;
951 	char taskqname[32];
952 
953 
954 	thread = &state->bt_thread;
955 
956 	mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL);
957 	cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL);
958 	cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL);
959 	thread->ut_wake = B_FALSE;
960 	thread->ut_exit = B_FALSE;
961 	thread->ut_exit_done = B_TRUE;
962 
963 	/* create but don't start the user thread */
964 	(void) sprintf(taskqname, "xvptap_%d", state->bt_instance);
965 	thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1,
966 	    TASKQ_DEFAULTPRI, 0);
967 	if (thread->ut_taskq == NULL) {
968 		goto userinitthrfail_taskq_create;
969 	}
970 
971 	return (DDI_SUCCESS);
972 
973 userinitthrfail_taskq_create:
974 	cv_destroy(&thread->ut_exit_done_cv);
975 	cv_destroy(&thread->ut_wake_cv);
976 	mutex_destroy(&thread->ut_mutex);
977 
978 	return (DDI_FAILURE);
979 }
980 
981 
982 /*
983  * xpvtap_user_thread_start()
984  */
985 static void
xpvtap_user_thread_start(caddr_t arg)986 xpvtap_user_thread_start(caddr_t arg)
987 {
988 	xpvtap_user_thread_t *thread;
989 	xpvtap_state_t *state;
990 	int e;
991 
992 
993 	state = (xpvtap_state_t *)arg;
994 	thread = &state->bt_thread;
995 
996 	/* start the user thread */
997 	thread->ut_exit_done = B_FALSE;
998 	e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state,
999 	    DDI_SLEEP);
1000 	if (e != DDI_SUCCESS) {
1001 		thread->ut_exit_done = B_TRUE;
1002 		cmn_err(CE_WARN, "Unable to start user thread\n");
1003 	}
1004 }
1005 
1006 
1007 /*
1008  * xpvtap_user_thread_stop()
1009  */
1010 static void
xpvtap_user_thread_stop(xpvtap_state_t * state)1011 xpvtap_user_thread_stop(xpvtap_state_t *state)
1012 {
1013 	/* wake thread so it can exit */
1014 	mutex_enter(&state->bt_thread.ut_mutex);
1015 	state->bt_thread.ut_wake = B_TRUE;
1016 	state->bt_thread.ut_exit = B_TRUE;
1017 	cv_signal(&state->bt_thread.ut_wake_cv);
1018 	if (!state->bt_thread.ut_exit_done) {
1019 		cv_wait(&state->bt_thread.ut_exit_done_cv,
1020 		    &state->bt_thread.ut_mutex);
1021 	}
1022 	mutex_exit(&state->bt_thread.ut_mutex);
1023 	ASSERT(state->bt_thread.ut_exit_done);
1024 }
1025 
1026 
1027 /*
1028  * xpvtap_user_fini()
1029  */
1030 static void
xpvtap_user_fini(xpvtap_state_t * state)1031 xpvtap_user_fini(xpvtap_state_t *state)
1032 {
1033 	xpvtap_user_map_t *map;
1034 
1035 
1036 	map = &state->bt_map;
1037 
1038 	xpvtap_user_thread_fini(state);
1039 	xpvtap_rs_fini(&map->um_rs);
1040 	kmem_free(map->um_outstanding_reqs,
1041 	    sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
1042 	xpvtap_user_ring_fini(state);
1043 }
1044 
1045 
1046 /*
1047  * xpvtap_user_ring_fini()
1048  */
1049 static void
xpvtap_user_ring_fini(xpvtap_state_t * state)1050 xpvtap_user_ring_fini(xpvtap_state_t *state)
1051 {
1052 	ddi_umem_free(state->bt_user_ring.ur_cookie);
1053 }
1054 
1055 
1056 /*
1057  * xpvtap_user_thread_fini()
1058  */
1059 static void
xpvtap_user_thread_fini(xpvtap_state_t * state)1060 xpvtap_user_thread_fini(xpvtap_state_t *state)
1061 {
1062 	ddi_taskq_destroy(state->bt_thread.ut_taskq);
1063 	cv_destroy(&state->bt_thread.ut_exit_done_cv);
1064 	cv_destroy(&state->bt_thread.ut_wake_cv);
1065 	mutex_destroy(&state->bt_thread.ut_mutex);
1066 }
1067 
1068 
1069 /*
1070  * xpvtap_user_thread()
1071  */
1072 static void
xpvtap_user_thread(void * arg)1073 xpvtap_user_thread(void *arg)
1074 {
1075 	xpvtap_user_thread_t *thread;
1076 	blkif_response_t resp;
1077 	xpvtap_state_t *state;
1078 	blkif_request_t req;
1079 	boolean_t b;
1080 	uint_t uid;
1081 	int e;
1082 
1083 
1084 	state = (xpvtap_state_t *)arg;
1085 	thread = &state->bt_thread;
1086 
1087 xpvtap_thread_start:
1088 	/* See if we are supposed to exit */
1089 	mutex_enter(&thread->ut_mutex);
1090 	if (thread->ut_exit) {
1091 		thread->ut_exit_done = B_TRUE;
1092 		cv_signal(&state->bt_thread.ut_exit_done_cv);
1093 		mutex_exit(&thread->ut_mutex);
1094 		return;
1095 	}
1096 
1097 	/*
1098 	 * if we aren't supposed to be awake, wait until someone wakes us.
1099 	 * when we wake up, check for a kill or someone telling us to exit.
1100 	 */
1101 	if (!thread->ut_wake) {
1102 		e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex);
1103 		if ((e == 0) || (thread->ut_exit)) {
1104 			thread->ut_exit = B_TRUE;
1105 			mutex_exit(&thread->ut_mutex);
1106 			goto xpvtap_thread_start;
1107 		}
1108 	}
1109 
1110 	/* if someone didn't wake us, go back to the start of the thread */
1111 	if (!thread->ut_wake) {
1112 		mutex_exit(&thread->ut_mutex);
1113 		goto xpvtap_thread_start;
1114 	}
1115 
1116 	/* we are awake */
1117 	thread->ut_wake = B_FALSE;
1118 	mutex_exit(&thread->ut_mutex);
1119 
1120 	/* process requests from the guest */
1121 	do {
1122 		/*
1123 		 * check for requests from the guest. if we don't have any,
1124 		 * break out of the loop.
1125 		 */
1126 		e = blk_ring_request_get(state->bt_guest_ring, &req);
1127 		if (e == B_FALSE) {
1128 			break;
1129 		}
1130 
1131 		/* we got a request, map the grefs into the user app's VA */
1132 		e = xpvtap_user_request_map(state, &req, &uid);
1133 		if (e != DDI_SUCCESS) {
1134 			/*
1135 			 * If we couldn't map the request (e.g. user app hasn't
1136 			 * opened the device yet), requeue it and try again
1137 			 * later
1138 			 */
1139 			blk_ring_request_requeue(state->bt_guest_ring);
1140 			break;
1141 		}
1142 
1143 		/* push the request to the user app */
1144 		e = xpvtap_user_request_push(state, &req, uid);
1145 		if (e != DDI_SUCCESS) {
1146 			resp.id = req.id;
1147 			resp.operation = req.operation;
1148 			resp.status = BLKIF_RSP_ERROR;
1149 			blk_ring_response_put(state->bt_guest_ring, &resp);
1150 		}
1151 	} while (!thread->ut_exit);
1152 
1153 	/* process reponses from the user app */
1154 	do {
1155 		/*
1156 		 * check for responses from the user app. if we don't have any,
1157 		 * break out of the loop.
1158 		 */
1159 		b = xpvtap_user_response_get(state, &resp, &uid);
1160 		if (b != B_TRUE) {
1161 			break;
1162 		}
1163 
1164 		/*
1165 		 * if we got a response, unmap the grefs from the matching
1166 		 * request.
1167 		 */
1168 		xpvtap_user_request_unmap(state, uid);
1169 
1170 		/* push the response to the guest */
1171 		blk_ring_response_put(state->bt_guest_ring, &resp);
1172 	} while (!thread->ut_exit);
1173 
1174 	goto xpvtap_thread_start;
1175 }
1176 
1177 
1178 /*
1179  * xpvtap_user_request_map()
1180  */
1181 static int
xpvtap_user_request_map(xpvtap_state_t * state,blkif_request_t * req,uint_t * uid)1182 xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
1183     uint_t *uid)
1184 {
1185 	grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1186 	struct seg *seg;
1187 	struct as *as;
1188 	domid_t domid;
1189 	caddr_t uaddr;
1190 	uint_t flags;
1191 	int i;
1192 	int e;
1193 
1194 
1195 	domid = xvdi_get_oeid(state->bt_dip);
1196 
1197 	as = state->bt_map.um_as;
1198 	if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) {
1199 		return (DDI_FAILURE);
1200 	}
1201 
1202 	/* has to happen after segmap returns */
1203 	if (!state->bt_map.um_registered) {
1204 		/* register the pte's with segmf */
1205 		e = xpvtap_segmf_register(state);
1206 		if (e != DDI_SUCCESS) {
1207 			return (DDI_FAILURE);
1208 		}
1209 	}
1210 
1211 	/* alloc an ID for the user ring */
1212 	e = xpvtap_rs_alloc(state->bt_map.um_rs, uid);
1213 	if (e != DDI_SUCCESS) {
1214 		return (DDI_FAILURE);
1215 	}
1216 
1217 	/* if we don't have any segments to map, we're done */
1218 	if ((req->operation == BLKIF_OP_WRITE_BARRIER) ||
1219 	    (req->operation == BLKIF_OP_FLUSH_DISKCACHE) ||
1220 	    (req->nr_segments == 0)) {
1221 		return (DDI_SUCCESS);
1222 	}
1223 
1224 	/* get the apps gref address */
1225 	uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid);
1226 
1227 	AS_LOCK_ENTER(as, RW_READER);
1228 	seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1229 	if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1230 	    (seg->s_base + seg->s_size))) {
1231 		AS_LOCK_EXIT(as);
1232 		return (DDI_FAILURE);
1233 	}
1234 
1235 	/* if we are reading from disk, we are writing into memory */
1236 	flags = 0;
1237 	if (req->operation == BLKIF_OP_READ) {
1238 		flags |= SEGMF_GREF_WR;
1239 	}
1240 
1241 	/* Load the grefs into seg_mf */
1242 	for (i = 0; i < req->nr_segments; i++) {
1243 		gref[i] = req->seg[i].gref;
1244 	}
1245 	(void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments,
1246 	    domid);
1247 
1248 	AS_LOCK_EXIT(as);
1249 
1250 	return (DDI_SUCCESS);
1251 }
1252 
1253 
1254 /*
1255  * xpvtap_user_request_push()
1256  */
1257 static int
xpvtap_user_request_push(xpvtap_state_t * state,blkif_request_t * req,uint_t uid)1258 xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req,
1259     uint_t uid)
1260 {
1261 	blkif_request_t *outstanding_req;
1262 	blkif_front_ring_t *uring;
1263 	blkif_request_t *target;
1264 	xpvtap_user_map_t *map;
1265 
1266 
1267 	uring = &state->bt_user_ring.ur_ring;
1268 	map = &state->bt_map;
1269 
1270 	target = RING_GET_REQUEST(uring, uring->req_prod_pvt);
1271 
1272 	/*
1273 	 * Save request from the frontend. used for ID mapping and unmap
1274 	 * on response/cleanup
1275 	 */
1276 	outstanding_req = &map->um_outstanding_reqs[uid];
1277 	bcopy(req, outstanding_req, sizeof (*outstanding_req));
1278 
1279 	/* put the request on the user ring */
1280 	bcopy(req, target, sizeof (*req));
1281 	target->id = (uint64_t)uid;
1282 	uring->req_prod_pvt++;
1283 
1284 	pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM);
1285 
1286 	return (DDI_SUCCESS);
1287 }
1288 
1289 
1290 static void
xpvtap_user_request_unmap(xpvtap_state_t * state,uint_t uid)1291 xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid)
1292 {
1293 	blkif_request_t *req;
1294 	struct seg *seg;
1295 	struct as *as;
1296 	caddr_t uaddr;
1297 	int e;
1298 
1299 
1300 	as = state->bt_map.um_as;
1301 	if (as == NULL) {
1302 		return;
1303 	}
1304 
1305 	/* get a copy of the original request */
1306 	req = &state->bt_map.um_outstanding_reqs[uid];
1307 
1308 	/* unmap the grefs for this request */
1309 	if ((req->operation != BLKIF_OP_WRITE_BARRIER) &&
1310 	    (req->operation != BLKIF_OP_FLUSH_DISKCACHE) &&
1311 	    (req->nr_segments != 0)) {
1312 		uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid);
1313 		AS_LOCK_ENTER(as, RW_READER);
1314 		seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1315 		if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1316 		    (seg->s_base + seg->s_size))) {
1317 			AS_LOCK_EXIT(as);
1318 			xpvtap_rs_free(state->bt_map.um_rs, uid);
1319 			return;
1320 		}
1321 
1322 		e = segmf_release_grefs(seg, uaddr, req->nr_segments);
1323 		if (e != 0) {
1324 			cmn_err(CE_WARN, "unable to release grefs");
1325 		}
1326 
1327 		AS_LOCK_EXIT(as);
1328 	}
1329 
1330 	/* free up the user ring id */
1331 	xpvtap_rs_free(state->bt_map.um_rs, uid);
1332 }
1333 
1334 
1335 static int
xpvtap_user_response_get(xpvtap_state_t * state,blkif_response_t * resp,uint_t * uid)1336 xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp,
1337     uint_t *uid)
1338 {
1339 	blkif_front_ring_t *uring;
1340 	blkif_response_t *target;
1341 
1342 
1343 	uring = &state->bt_user_ring.ur_ring;
1344 
1345 	if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) {
1346 		return (B_FALSE);
1347 	}
1348 
1349 	target = NULL;
1350 	target = RING_GET_RESPONSE(uring, uring->rsp_cons);
1351 	if (target == NULL) {
1352 		return (B_FALSE);
1353 	}
1354 
1355 	/* copy out the user app response */
1356 	bcopy(target, resp, sizeof (*resp));
1357 	uring->rsp_cons++;
1358 
1359 	/* restore the quests id from the original request */
1360 	*uid = (uint_t)resp->id;
1361 	resp->id = state->bt_map.um_outstanding_reqs[*uid].id;
1362 
1363 	return (B_TRUE);
1364 }
1365 
1366 
1367 /*
1368  * xpvtap_user_app_stop()
1369  */
xpvtap_user_app_stop(caddr_t arg)1370 static void xpvtap_user_app_stop(caddr_t arg)
1371 {
1372 	xpvtap_state_t *state;
1373 	clock_t rc;
1374 
1375 	state = (xpvtap_state_t *)arg;
1376 
1377 	/*
1378 	 * Give the app 10 secs to exit. If it doesn't exit, it's not a serious
1379 	 * problem, we just won't auto-detach the driver.
1380 	 */
1381 	mutex_enter(&state->bt_open.bo_mutex);
1382 	if (state->bt_open.bo_opened) {
1383 		rc = cv_reltimedwait(&state->bt_open.bo_exit_cv,
1384 		    &state->bt_open.bo_mutex, drv_usectohz(10000000),
1385 		    TR_CLOCK_TICK);
1386 		if (rc <= 0) {
1387 			cmn_err(CE_NOTE, "!user process still has driver open, "
1388 			    "deferring detach\n");
1389 		}
1390 	}
1391 	mutex_exit(&state->bt_open.bo_mutex);
1392 }
1393 
1394 
1395 /*
1396  * xpvtap_rs_init()
1397  *    Initialize the resource structure. init() returns a handle to be used
1398  *    for the rest of the resource functions. This code is written assuming
1399  *    that min_val will be close to 0. Therefore, we will allocate the free
1400  *    buffer only taking max_val into account.
1401  */
1402 static void
xpvtap_rs_init(uint_t min_val,uint_t max_val,xpvtap_rs_hdl_t * handle)1403 xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle)
1404 {
1405 	xpvtap_rs_t *rstruct;
1406 	uint_t array_size;
1407 	uint_t index;
1408 
1409 
1410 	ASSERT(handle != NULL);
1411 	ASSERT(min_val < max_val);
1412 
1413 	/* alloc space for resource structure */
1414 	rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP);
1415 
1416 	/*
1417 	 * Test to see if the max value is 64-bit aligned. If so, we don't need
1418 	 * to allocate an extra 64-bit word. alloc space for free buffer
1419 	 * (8 bytes per uint64_t).
1420 	 */
1421 	if ((max_val & 0x3F) == 0) {
1422 		rstruct->rs_free_size = (max_val >> 6) * 8;
1423 	} else {
1424 		rstruct->rs_free_size = ((max_val >> 6) + 1) * 8;
1425 	}
1426 	rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP);
1427 
1428 	/* Initialize resource structure */
1429 	rstruct->rs_min = min_val;
1430 	rstruct->rs_last = min_val;
1431 	rstruct->rs_max = max_val;
1432 	mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL);
1433 	rstruct->rs_flushing = B_FALSE;
1434 
1435 	/* Mark all resources as free */
1436 	array_size = rstruct->rs_free_size >> 3;
1437 	for (index = 0; index < array_size; index++) {
1438 		rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF;
1439 	}
1440 
1441 	/* setup handle which is returned from this function */
1442 	*handle = rstruct;
1443 }
1444 
1445 
1446 /*
1447  * xpvtap_rs_fini()
1448  *    Frees up the space allocated in init().  Notice that a pointer to the
1449  *    handle is used for the parameter.  fini() will set the handle to NULL
1450  *    before returning.
1451  */
1452 static void
xpvtap_rs_fini(xpvtap_rs_hdl_t * handle)1453 xpvtap_rs_fini(xpvtap_rs_hdl_t *handle)
1454 {
1455 	xpvtap_rs_t *rstruct;
1456 
1457 
1458 	ASSERT(handle != NULL);
1459 
1460 	rstruct = (xpvtap_rs_t *)*handle;
1461 
1462 	mutex_destroy(&rstruct->rs_mutex);
1463 	kmem_free(rstruct->rs_free, rstruct->rs_free_size);
1464 	kmem_free(rstruct, sizeof (xpvtap_rs_t));
1465 
1466 	/* set handle to null.  This helps catch bugs. */
1467 	*handle = NULL;
1468 }
1469 
1470 
1471 /*
1472  * xpvtap_rs_alloc()
1473  *    alloc a resource. If alloc fails, we are out of resources.
1474  */
1475 static int
xpvtap_rs_alloc(xpvtap_rs_hdl_t handle,uint_t * resource)1476 xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource)
1477 {
1478 	xpvtap_rs_t *rstruct;
1479 	uint_t array_idx;
1480 	uint64_t free;
1481 	uint_t index;
1482 	uint_t last;
1483 	uint_t min;
1484 	uint_t max;
1485 
1486 
1487 	ASSERT(handle != NULL);
1488 	ASSERT(resource != NULL);
1489 
1490 	rstruct = (xpvtap_rs_t *)handle;
1491 
1492 	mutex_enter(&rstruct->rs_mutex);
1493 	min = rstruct->rs_min;
1494 	max = rstruct->rs_max;
1495 
1496 	/*
1497 	 * Find a free resource. This will return out of the loop once it finds
1498 	 * a free resource. There are a total of 'max'-'min'+1 resources.
1499 	 * Performs a round robin allocation.
1500 	 */
1501 	for (index = min; index <= max; index++) {
1502 
1503 		array_idx = rstruct->rs_last >> 6;
1504 		free = rstruct->rs_free[array_idx];
1505 		last = rstruct->rs_last & 0x3F;
1506 
1507 		/* if the next resource to check is free */
1508 		if ((free & ((uint64_t)1 << last)) != 0) {
1509 			/* we are using this resource */
1510 			*resource = rstruct->rs_last;
1511 
1512 			/* take it out of the free list */
1513 			rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last);
1514 
1515 			/*
1516 			 * increment the last count so we start checking the
1517 			 * next resource on the next alloc().  Note the rollover
1518 			 * at 'max'+1.
1519 			 */
1520 			rstruct->rs_last++;
1521 			if (rstruct->rs_last > max) {
1522 				rstruct->rs_last = rstruct->rs_min;
1523 			}
1524 
1525 			/* unlock the resource structure */
1526 			mutex_exit(&rstruct->rs_mutex);
1527 
1528 			return (DDI_SUCCESS);
1529 		}
1530 
1531 		/*
1532 		 * This resource is not free, lets go to the next one. Note the
1533 		 * rollover at 'max'.
1534 		 */
1535 		rstruct->rs_last++;
1536 		if (rstruct->rs_last > max) {
1537 			rstruct->rs_last = rstruct->rs_min;
1538 		}
1539 	}
1540 
1541 	mutex_exit(&rstruct->rs_mutex);
1542 
1543 	return (DDI_FAILURE);
1544 }
1545 
1546 
1547 /*
1548  * xpvtap_rs_free()
1549  *    Free the previously alloc'd resource.  Once a resource has been free'd,
1550  *    it can be used again when alloc is called.
1551  */
1552 static void
xpvtap_rs_free(xpvtap_rs_hdl_t handle,uint_t resource)1553 xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource)
1554 {
1555 	xpvtap_rs_t *rstruct;
1556 	uint_t array_idx;
1557 	uint_t offset;
1558 
1559 
1560 	ASSERT(handle != NULL);
1561 
1562 	rstruct = (xpvtap_rs_t *)handle;
1563 	ASSERT(resource >= rstruct->rs_min);
1564 	ASSERT(resource <= rstruct->rs_max);
1565 
1566 	if (!rstruct->rs_flushing) {
1567 		mutex_enter(&rstruct->rs_mutex);
1568 	}
1569 
1570 	/* Put the resource back in the free list */
1571 	array_idx = resource >> 6;
1572 	offset = resource & 0x3F;
1573 	rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset);
1574 
1575 	if (!rstruct->rs_flushing) {
1576 		mutex_exit(&rstruct->rs_mutex);
1577 	}
1578 }
1579 
1580 
1581 /*
1582  * xpvtap_rs_flush()
1583  */
1584 static void
xpvtap_rs_flush(xpvtap_rs_hdl_t handle,xpvtap_rs_cleanup_t callback,void * arg)1585 xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback,
1586     void *arg)
1587 {
1588 	xpvtap_rs_t *rstruct;
1589 	uint_t array_idx;
1590 	uint64_t free;
1591 	uint_t index;
1592 	uint_t last;
1593 	uint_t min;
1594 	uint_t max;
1595 
1596 
1597 	ASSERT(handle != NULL);
1598 
1599 	rstruct = (xpvtap_rs_t *)handle;
1600 
1601 	mutex_enter(&rstruct->rs_mutex);
1602 	min = rstruct->rs_min;
1603 	max = rstruct->rs_max;
1604 
1605 	rstruct->rs_flushing = B_TRUE;
1606 
1607 	/*
1608 	 * for all resources not free, call the callback routine to clean it
1609 	 * up.
1610 	 */
1611 	for (index = min; index <= max; index++) {
1612 
1613 		array_idx = rstruct->rs_last >> 6;
1614 		free = rstruct->rs_free[array_idx];
1615 		last = rstruct->rs_last & 0x3F;
1616 
1617 		/* if the next resource to check is not free */
1618 		if ((free & ((uint64_t)1 << last)) == 0) {
1619 			/* call the callback to cleanup */
1620 			(*callback)(arg, rstruct->rs_last);
1621 
1622 			/* put it back in the free list */
1623 			rstruct->rs_free[array_idx] |= ((uint64_t)1 << last);
1624 		}
1625 
1626 		/* go to the next one. Note the rollover at 'max' */
1627 		rstruct->rs_last++;
1628 		if (rstruct->rs_last > max) {
1629 			rstruct->rs_last = rstruct->rs_min;
1630 		}
1631 	}
1632 
1633 	mutex_exit(&rstruct->rs_mutex);
1634 }
1635