1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2017 Joyent, Inc.
26 */
27
28
29 #include <sys/errno.h>
30 #include <sys/types.h>
31 #include <sys/conf.h>
32 #include <sys/kmem.h>
33 #include <sys/ddi.h>
34 #include <sys/stat.h>
35 #include <sys/sunddi.h>
36 #include <sys/file.h>
37 #include <sys/open.h>
38 #include <sys/modctl.h>
39 #include <sys/ddi_impldefs.h>
40 #include <sys/sysmacros.h>
41 #include <sys/ddidevmap.h>
42 #include <sys/policy.h>
43
44 #include <sys/vmsystm.h>
45 #include <vm/hat_i86.h>
46 #include <vm/hat_pte.h>
47 #include <vm/seg_kmem.h>
48 #include <vm/seg_mf.h>
49
50 #include <xen/io/blkif_impl.h>
51 #include <xen/io/blk_common.h>
52 #include <xen/io/xpvtap.h>
53
54
55 static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred);
56 static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred);
57 static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
58 cred_t *cred, int *rval);
59 static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off,
60 size_t len, size_t *maplen, uint_t model);
61 static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
62 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
63 cred_t *cred_p);
64 static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
65 struct pollhead **phpp);
66
67 static struct cb_ops xpvtap_cb_ops = {
68 xpvtap_open, /* cb_open */
69 xpvtap_close, /* cb_close */
70 nodev, /* cb_strategy */
71 nodev, /* cb_print */
72 nodev, /* cb_dump */
73 nodev, /* cb_read */
74 nodev, /* cb_write */
75 xpvtap_ioctl, /* cb_ioctl */
76 xpvtap_devmap, /* cb_devmap */
77 nodev, /* cb_mmap */
78 xpvtap_segmap, /* cb_segmap */
79 xpvtap_chpoll, /* cb_chpoll */
80 ddi_prop_op, /* cb_prop_op */
81 NULL, /* cb_stream */
82 D_NEW | D_MP | D_64BIT | D_DEVMAP, /* cb_flag */
83 CB_REV
84 };
85
86 static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
87 void **result);
88 static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
89 static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
90
91 static struct dev_ops xpvtap_dev_ops = {
92 DEVO_REV, /* devo_rev */
93 0, /* devo_refcnt */
94 xpvtap_getinfo, /* devo_getinfo */
95 nulldev, /* devo_identify */
96 nulldev, /* devo_probe */
97 xpvtap_attach, /* devo_attach */
98 xpvtap_detach, /* devo_detach */
99 nodev, /* devo_reset */
100 &xpvtap_cb_ops, /* devo_cb_ops */
101 NULL, /* devo_bus_ops */
102 NULL /* power */
103 };
104
105
106 static struct modldrv xpvtap_modldrv = {
107 &mod_driverops, /* Type of module. This one is a driver */
108 "xpvtap driver", /* Name of the module. */
109 &xpvtap_dev_ops, /* driver ops */
110 };
111
112 static struct modlinkage xpvtap_modlinkage = {
113 MODREV_1,
114 (void *) &xpvtap_modldrv,
115 NULL
116 };
117
118
119 void *xpvtap_statep;
120
121
122 static xpvtap_state_t *xpvtap_drv_init(int instance);
123 static void xpvtap_drv_fini(xpvtap_state_t *state);
124 static uint_t xpvtap_intr(caddr_t arg);
125
126 typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs);
127 static void xpvtap_rs_init(uint_t min_val, uint_t max_val,
128 xpvtap_rs_hdl_t *handle);
129 static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle);
130 static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs);
131 static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs);
132 static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle,
133 xpvtap_rs_cleanup_t callback, void *arg);
134
135 static int xpvtap_segmf_register(xpvtap_state_t *state);
136 static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event);
137
138 static int xpvtap_user_init(xpvtap_state_t *state);
139 static void xpvtap_user_fini(xpvtap_state_t *state);
140 static int xpvtap_user_ring_init(xpvtap_state_t *state);
141 static void xpvtap_user_ring_fini(xpvtap_state_t *state);
142 static int xpvtap_user_thread_init(xpvtap_state_t *state);
143 static void xpvtap_user_thread_fini(xpvtap_state_t *state);
144 static void xpvtap_user_thread_start(caddr_t arg);
145 static void xpvtap_user_thread_stop(xpvtap_state_t *state);
146 static void xpvtap_user_thread(void *arg);
147
148 static void xpvtap_user_app_stop(caddr_t arg);
149
150 static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
151 uint_t *uid);
152 static int xpvtap_user_request_push(xpvtap_state_t *state,
153 blkif_request_t *req, uint_t uid);
154 static int xpvtap_user_response_get(xpvtap_state_t *state,
155 blkif_response_t *resp, uint_t *uid);
156 static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid);
157
158
159 /*
160 * _init()
161 */
162 int
_init(void)163 _init(void)
164 {
165 int e;
166
167 e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1);
168 if (e != 0) {
169 return (e);
170 }
171
172 e = mod_install(&xpvtap_modlinkage);
173 if (e != 0) {
174 ddi_soft_state_fini(&xpvtap_statep);
175 return (e);
176 }
177
178 return (0);
179 }
180
181
182 /*
183 * _info()
184 */
185 int
_info(struct modinfo * modinfop)186 _info(struct modinfo *modinfop)
187 {
188 return (mod_info(&xpvtap_modlinkage, modinfop));
189 }
190
191
192 /*
193 * _fini()
194 */
195 int
_fini(void)196 _fini(void)
197 {
198 int e;
199
200 e = mod_remove(&xpvtap_modlinkage);
201 if (e != 0) {
202 return (e);
203 }
204
205 ddi_soft_state_fini(&xpvtap_statep);
206
207 return (0);
208 }
209
210
211 /*
212 * xpvtap_attach()
213 */
214 static int
xpvtap_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)215 xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
216 {
217 blk_ringinit_args_t args;
218 xpvtap_state_t *state;
219 int instance;
220 int e;
221
222
223 switch (cmd) {
224 case DDI_ATTACH:
225 break;
226
227 case DDI_RESUME:
228 return (DDI_SUCCESS);
229
230 default:
231 return (DDI_FAILURE);
232 }
233
234 /* initialize our state info */
235 instance = ddi_get_instance(dip);
236 state = xpvtap_drv_init(instance);
237 if (state == NULL) {
238 return (DDI_FAILURE);
239 }
240 state->bt_dip = dip;
241
242 /* Initialize the guest ring */
243 args.ar_dip = state->bt_dip;
244 args.ar_intr = xpvtap_intr;
245 args.ar_intr_arg = (caddr_t)state;
246 args.ar_ringup = xpvtap_user_thread_start;
247 args.ar_ringup_arg = (caddr_t)state;
248 args.ar_ringdown = xpvtap_user_app_stop;
249 args.ar_ringdown_arg = (caddr_t)state;
250 e = blk_ring_init(&args, &state->bt_guest_ring);
251 if (e != DDI_SUCCESS) {
252 goto attachfail_ringinit;
253 }
254
255 /* create the minor node (for ioctl/mmap) */
256 e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance,
257 DDI_PSEUDO, 0);
258 if (e != DDI_SUCCESS) {
259 goto attachfail_minor_node;
260 }
261
262 /* Report that driver was loaded */
263 ddi_report_dev(dip);
264
265 return (DDI_SUCCESS);
266
267 attachfail_minor_node:
268 blk_ring_fini(&state->bt_guest_ring);
269 attachfail_ringinit:
270 xpvtap_drv_fini(state);
271 return (DDI_FAILURE);
272 }
273
274
275 /*
276 * xpvtap_detach()
277 */
278 static int
xpvtap_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)279 xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
280 {
281 xpvtap_state_t *state;
282 int instance;
283
284
285 instance = ddi_get_instance(dip);
286 state = ddi_get_soft_state(xpvtap_statep, instance);
287 if (state == NULL) {
288 return (DDI_FAILURE);
289 }
290
291 switch (cmd) {
292 case DDI_DETACH:
293 break;
294
295 case DDI_SUSPEND:
296 default:
297 return (DDI_FAILURE);
298 }
299
300 xpvtap_user_thread_stop(state);
301 blk_ring_fini(&state->bt_guest_ring);
302 xpvtap_drv_fini(state);
303 ddi_remove_minor_node(dip, NULL);
304
305 return (DDI_SUCCESS);
306 }
307
308
309 /*
310 * xpvtap_getinfo()
311 */
312 /*ARGSUSED*/
313 static int
xpvtap_getinfo(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)314 xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
315 {
316 xpvtap_state_t *state;
317 int instance;
318 dev_t dev;
319 int e;
320
321
322 dev = (dev_t)arg;
323 instance = getminor(dev);
324
325 switch (cmd) {
326 case DDI_INFO_DEVT2DEVINFO:
327 state = ddi_get_soft_state(xpvtap_statep, instance);
328 if (state == NULL) {
329 return (DDI_FAILURE);
330 }
331 *result = (void *)state->bt_dip;
332 e = DDI_SUCCESS;
333 break;
334
335 case DDI_INFO_DEVT2INSTANCE:
336 *result = (void *)(uintptr_t)instance;
337 e = DDI_SUCCESS;
338 break;
339
340 default:
341 e = DDI_FAILURE;
342 break;
343 }
344
345 return (e);
346 }
347
348
349 /*
350 * xpvtap_open()
351 */
352 /*ARGSUSED*/
353 static int
xpvtap_open(dev_t * devp,int flag,int otyp,cred_t * cred)354 xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred)
355 {
356 xpvtap_state_t *state;
357 int instance;
358
359
360 if (secpolicy_xvm_control(cred)) {
361 return (EPERM);
362 }
363
364 instance = getminor(*devp);
365 state = ddi_get_soft_state(xpvtap_statep, instance);
366 if (state == NULL) {
367 return (ENXIO);
368 }
369
370 /* we should only be opened once */
371 mutex_enter(&state->bt_open.bo_mutex);
372 if (state->bt_open.bo_opened) {
373 mutex_exit(&state->bt_open.bo_mutex);
374 return (EBUSY);
375 }
376 state->bt_open.bo_opened = B_TRUE;
377 mutex_exit(&state->bt_open.bo_mutex);
378
379 /*
380 * save the apps address space. need it for mapping/unmapping grefs
381 * since will be doing it in a separate kernel thread.
382 */
383 state->bt_map.um_as = curproc->p_as;
384
385 return (0);
386 }
387
388
389 /*
390 * xpvtap_close()
391 */
392 /*ARGSUSED*/
393 static int
xpvtap_close(dev_t devp,int flag,int otyp,cred_t * cred)394 xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred)
395 {
396 xpvtap_state_t *state;
397 int instance;
398
399
400 instance = getminor(devp);
401 state = ddi_get_soft_state(xpvtap_statep, instance);
402 if (state == NULL) {
403 return (ENXIO);
404 }
405
406 /*
407 * wake thread so it can cleanup and wait for it to exit so we can
408 * be sure it's not in the middle of processing a request/response.
409 */
410 mutex_enter(&state->bt_thread.ut_mutex);
411 state->bt_thread.ut_wake = B_TRUE;
412 state->bt_thread.ut_exit = B_TRUE;
413 cv_signal(&state->bt_thread.ut_wake_cv);
414 if (!state->bt_thread.ut_exit_done) {
415 cv_wait(&state->bt_thread.ut_exit_done_cv,
416 &state->bt_thread.ut_mutex);
417 }
418 ASSERT(state->bt_thread.ut_exit_done);
419 mutex_exit(&state->bt_thread.ut_mutex);
420
421 state->bt_map.um_as = NULL;
422 state->bt_map.um_guest_pages = NULL;
423
424 /*
425 * when the ring is brought down, a userland hotplug script is run
426 * which tries to bring the userland app down. We'll wait for a bit
427 * for the user app to exit. Notify the thread waiting that the app
428 * has closed the driver.
429 */
430 mutex_enter(&state->bt_open.bo_mutex);
431 ASSERT(state->bt_open.bo_opened);
432 state->bt_open.bo_opened = B_FALSE;
433 cv_signal(&state->bt_open.bo_exit_cv);
434 mutex_exit(&state->bt_open.bo_mutex);
435
436 return (0);
437 }
438
439
440 /*
441 * xpvtap_ioctl()
442 */
443 /*ARGSUSED*/
444 static int
xpvtap_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * cred,int * rval)445 xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
446 int *rval)
447 {
448 xpvtap_state_t *state;
449 int instance;
450
451
452 if (secpolicy_xvm_control(cred)) {
453 return (EPERM);
454 }
455
456 instance = getminor(dev);
457 if (instance == -1) {
458 return (EBADF);
459 }
460
461 state = ddi_get_soft_state(xpvtap_statep, instance);
462 if (state == NULL) {
463 return (EBADF);
464 }
465
466 switch (cmd) {
467 case XPVTAP_IOCTL_RESP_PUSH:
468 /*
469 * wake thread, thread handles guest requests and user app
470 * responses.
471 */
472 mutex_enter(&state->bt_thread.ut_mutex);
473 state->bt_thread.ut_wake = B_TRUE;
474 cv_signal(&state->bt_thread.ut_wake_cv);
475 mutex_exit(&state->bt_thread.ut_mutex);
476 break;
477
478 default:
479 cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd);
480 return (ENXIO);
481 }
482
483 return (0);
484 }
485
486
487 /*
488 * xpvtap_segmap()
489 */
490 /*ARGSUSED*/
491 static int
xpvtap_segmap(dev_t dev,off_t off,struct as * asp,caddr_t * addrp,off_t len,unsigned int prot,unsigned int maxprot,unsigned int flags,cred_t * cred_p)492 xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
493 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
494 cred_t *cred_p)
495 {
496 struct segmf_crargs a;
497 xpvtap_state_t *state;
498 int instance;
499 int e;
500
501
502 if (secpolicy_xvm_control(cred_p)) {
503 return (EPERM);
504 }
505
506 instance = getminor(dev);
507 state = ddi_get_soft_state(xpvtap_statep, instance);
508 if (state == NULL) {
509 return (EBADF);
510 }
511
512 /* the user app should be doing a MAP_SHARED mapping */
513 if ((flags & MAP_TYPE) != MAP_SHARED) {
514 return (EINVAL);
515 }
516
517 /*
518 * if this is the user ring (offset = 0), devmap it (which ends up in
519 * xpvtap_devmap). devmap will alloc and map the ring into the
520 * app's VA space.
521 */
522 if (off == 0) {
523 e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len,
524 prot, maxprot, flags, cred_p);
525 return (e);
526 }
527
528 /* this should be the mmap for the gref pages (offset = PAGESIZE) */
529 if (off != PAGESIZE) {
530 return (EINVAL);
531 }
532
533 /* make sure we get the size we're expecting */
534 if (len != XPVTAP_GREF_BUFSIZE) {
535 return (EINVAL);
536 }
537
538 /*
539 * reserve user app VA space for the gref pages and use segmf to
540 * manage the backing store for the physical memory. segmf will
541 * map in/out the grefs and fault them in/out.
542 */
543 ASSERT(asp == state->bt_map.um_as);
544 as_rangelock(asp);
545 if ((flags & MAP_FIXED) == 0) {
546 map_addr(addrp, len, 0, 0, flags);
547 if (*addrp == NULL) {
548 as_rangeunlock(asp);
549 return (ENOMEM);
550 }
551 } else {
552 /* User specified address */
553 (void) as_unmap(asp, *addrp, len);
554 }
555 a.dev = dev;
556 a.prot = (uchar_t)prot;
557 a.maxprot = (uchar_t)maxprot;
558 e = as_map(asp, *addrp, len, segmf_create, &a);
559 if (e != 0) {
560 as_rangeunlock(asp);
561 return (e);
562 }
563 as_rangeunlock(asp);
564
565 /*
566 * Stash user base address, and compute address where the request
567 * array will end up.
568 */
569 state->bt_map.um_guest_pages = (caddr_t)*addrp;
570 state->bt_map.um_guest_size = (size_t)len;
571
572 /* register an as callback so we can cleanup when the app goes away */
573 e = as_add_callback(asp, xpvtap_segmf_unregister, state,
574 AS_UNMAP_EVENT, *addrp, len, KM_SLEEP);
575 if (e != 0) {
576 (void) as_unmap(asp, *addrp, len);
577 return (EINVAL);
578 }
579
580 /* wake thread to see if there are requests already queued up */
581 mutex_enter(&state->bt_thread.ut_mutex);
582 state->bt_thread.ut_wake = B_TRUE;
583 cv_signal(&state->bt_thread.ut_wake_cv);
584 mutex_exit(&state->bt_thread.ut_mutex);
585
586 return (0);
587 }
588
589
590 /*
591 * xpvtap_devmap()
592 */
593 /*ARGSUSED*/
594 static int
xpvtap_devmap(dev_t dev,devmap_cookie_t dhp,offset_t off,size_t len,size_t * maplen,uint_t model)595 xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
596 size_t *maplen, uint_t model)
597 {
598 xpvtap_user_ring_t *usring;
599 xpvtap_state_t *state;
600 int instance;
601 int e;
602
603
604 instance = getminor(dev);
605 state = ddi_get_soft_state(xpvtap_statep, instance);
606 if (state == NULL) {
607 return (EBADF);
608 }
609
610 /* we should only get here if the offset was == 0 */
611 if (off != 0) {
612 return (EINVAL);
613 }
614
615 /* we should only be mapping in one page */
616 if (len != PAGESIZE) {
617 return (EINVAL);
618 }
619
620 /*
621 * we already allocated the user ring during driver attach, all we
622 * need to do is map it into the user app's VA.
623 */
624 usring = &state->bt_user_ring;
625 e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0,
626 PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL);
627 if (e < 0) {
628 return (e);
629 }
630
631 /* return the size to compete the devmap */
632 *maplen = PAGESIZE;
633
634 return (0);
635 }
636
637
638 /*
639 * xpvtap_chpoll()
640 */
641 static int
xpvtap_chpoll(dev_t dev,short events,int anyyet,short * reventsp,struct pollhead ** phpp)642 xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
643 struct pollhead **phpp)
644 {
645 xpvtap_user_ring_t *usring;
646 xpvtap_state_t *state;
647 int instance;
648
649
650 instance = getminor(dev);
651 if (instance == -1) {
652 return (EBADF);
653 }
654 state = ddi_get_soft_state(xpvtap_statep, instance);
655 if (state == NULL) {
656 return (EBADF);
657 }
658
659 if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) {
660 return (EINVAL);
661 }
662
663 /*
664 * if we pushed requests on the user ring since the last poll, wakeup
665 * the user app
666 */
667 *reventsp = 0;
668 usring = &state->bt_user_ring;
669 if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) {
670
671 /*
672 * XXX - is this faster here or xpvtap_user_request_push??
673 * prelim data says here. Because less membars or because
674 * user thread will spin in poll requests before getting to
675 * responses?
676 */
677 RING_PUSH_REQUESTS(&usring->ur_ring);
678
679 usring->ur_prod_polled = usring->ur_ring.sring->req_prod;
680 *reventsp = POLLIN | POLLRDNORM;
681 }
682
683 if ((*reventsp == 0 && !anyyet) || (events & POLLET)) {
684 *phpp = &state->bt_pollhead;
685 }
686
687 return (0);
688 }
689
690
691 /*
692 * xpvtap_drv_init()
693 */
694 static xpvtap_state_t *
xpvtap_drv_init(int instance)695 xpvtap_drv_init(int instance)
696 {
697 xpvtap_state_t *state;
698 int e;
699
700
701 e = ddi_soft_state_zalloc(xpvtap_statep, instance);
702 if (e != DDI_SUCCESS) {
703 return (NULL);
704 }
705 state = ddi_get_soft_state(xpvtap_statep, instance);
706 if (state == NULL) {
707 goto drvinitfail_get_soft_state;
708 }
709
710 state->bt_instance = instance;
711 mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL);
712 cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL);
713 state->bt_open.bo_opened = B_FALSE;
714 state->bt_map.um_registered = B_FALSE;
715
716 /* initialize user ring, thread, mapping state */
717 e = xpvtap_user_init(state);
718 if (e != DDI_SUCCESS) {
719 goto drvinitfail_userinit;
720 }
721
722 return (state);
723
724 drvinitfail_userinit:
725 cv_destroy(&state->bt_open.bo_exit_cv);
726 mutex_destroy(&state->bt_open.bo_mutex);
727 drvinitfail_get_soft_state:
728 (void) ddi_soft_state_free(xpvtap_statep, instance);
729 return (NULL);
730 }
731
732
733 /*
734 * xpvtap_drv_fini()
735 */
736 static void
xpvtap_drv_fini(xpvtap_state_t * state)737 xpvtap_drv_fini(xpvtap_state_t *state)
738 {
739 xpvtap_user_fini(state);
740 cv_destroy(&state->bt_open.bo_exit_cv);
741 mutex_destroy(&state->bt_open.bo_mutex);
742 (void) ddi_soft_state_free(xpvtap_statep, state->bt_instance);
743 }
744
745
746 /*
747 * xpvtap_intr()
748 * this routine will be called when we have a request on the guest ring.
749 */
750 static uint_t
xpvtap_intr(caddr_t arg)751 xpvtap_intr(caddr_t arg)
752 {
753 xpvtap_state_t *state;
754
755
756 state = (xpvtap_state_t *)arg;
757
758 /* wake thread, thread handles guest requests and user app responses */
759 mutex_enter(&state->bt_thread.ut_mutex);
760 state->bt_thread.ut_wake = B_TRUE;
761 cv_signal(&state->bt_thread.ut_wake_cv);
762 mutex_exit(&state->bt_thread.ut_mutex);
763
764 return (DDI_INTR_CLAIMED);
765 }
766
767
768 /*
769 * xpvtap_segmf_register()
770 */
771 static int
xpvtap_segmf_register(xpvtap_state_t * state)772 xpvtap_segmf_register(xpvtap_state_t *state)
773 {
774 struct seg *seg;
775 uint64_t pte_ma;
776 struct as *as;
777 caddr_t uaddr;
778 uint_t pgcnt;
779 int i;
780
781
782 as = state->bt_map.um_as;
783 pgcnt = btopr(state->bt_map.um_guest_size);
784 uaddr = state->bt_map.um_guest_pages;
785
786 if (pgcnt == 0) {
787 return (DDI_FAILURE);
788 }
789
790 AS_LOCK_ENTER(as, RW_READER);
791
792 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
793 if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) >
794 (seg->s_base + seg->s_size))) {
795 AS_LOCK_EXIT(as);
796 return (DDI_FAILURE);
797 }
798
799 /*
800 * lock down the htables so the HAT can't steal them. Register the
801 * PTE MA's for each gref page with seg_mf so we can do user space
802 * gref mappings.
803 */
804 for (i = 0; i < pgcnt; i++) {
805 hat_prepare_mapping(as->a_hat, uaddr, &pte_ma);
806 hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0,
807 PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK,
808 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
809 hat_release_mapping(as->a_hat, uaddr);
810 segmf_add_gref_pte(seg, uaddr, pte_ma);
811 uaddr += PAGESIZE;
812 }
813
814 state->bt_map.um_registered = B_TRUE;
815
816 AS_LOCK_EXIT(as);
817
818 return (DDI_SUCCESS);
819 }
820
821
822 /*
823 * xpvtap_segmf_unregister()
824 * as_callback routine
825 */
826 /*ARGSUSED*/
827 static void
xpvtap_segmf_unregister(struct as * as,void * arg,uint_t event)828 xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event)
829 {
830 xpvtap_state_t *state;
831 caddr_t uaddr;
832 uint_t pgcnt;
833 int i;
834
835
836 state = (xpvtap_state_t *)arg;
837 if (!state->bt_map.um_registered) {
838 /* remove the callback (which is this routine) */
839 (void) as_delete_callback(as, arg);
840 return;
841 }
842
843 pgcnt = btopr(state->bt_map.um_guest_size);
844 uaddr = state->bt_map.um_guest_pages;
845
846 /* unmap any outstanding req's grefs */
847 xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state);
848
849 /* Unlock the gref pages */
850 for (i = 0; i < pgcnt; i++) {
851 AS_LOCK_ENTER(as, RW_WRITER);
852 hat_prepare_mapping(as->a_hat, uaddr, NULL);
853 hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
854 hat_release_mapping(as->a_hat, uaddr);
855 AS_LOCK_EXIT(as);
856 uaddr += PAGESIZE;
857 }
858
859 /* remove the callback (which is this routine) */
860 (void) as_delete_callback(as, arg);
861
862 state->bt_map.um_registered = B_FALSE;
863 }
864
865
866 /*
867 * xpvtap_user_init()
868 */
869 static int
xpvtap_user_init(xpvtap_state_t * state)870 xpvtap_user_init(xpvtap_state_t *state)
871 {
872 xpvtap_user_map_t *map;
873 int e;
874
875
876 map = &state->bt_map;
877
878 /* Setup the ring between the driver and user app */
879 e = xpvtap_user_ring_init(state);
880 if (e != DDI_SUCCESS) {
881 return (DDI_FAILURE);
882 }
883
884 /*
885 * the user ring can handle BLKIF_RING_SIZE outstanding requests. This
886 * is the same number of requests as the guest ring. Initialize the
887 * state we use to track request IDs to the user app. These IDs will
888 * also identify which group of gref pages correspond with the
889 * request.
890 */
891 xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs);
892
893 /*
894 * allocate the space to store a copy of each outstanding requests. We
895 * will need to reference the ID and the number of segments when we
896 * get the response from the user app.
897 */
898 map->um_outstanding_reqs = kmem_zalloc(
899 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE,
900 KM_SLEEP);
901
902 /*
903 * initialize the thread we use to process guest requests and user
904 * responses.
905 */
906 e = xpvtap_user_thread_init(state);
907 if (e != DDI_SUCCESS) {
908 goto userinitfail_user_thread_init;
909 }
910
911 return (DDI_SUCCESS);
912
913 userinitfail_user_thread_init:
914 xpvtap_rs_fini(&map->um_rs);
915 kmem_free(map->um_outstanding_reqs,
916 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
917 xpvtap_user_ring_fini(state);
918 return (DDI_FAILURE);
919 }
920
921
922 /*
923 * xpvtap_user_ring_init()
924 */
925 static int
xpvtap_user_ring_init(xpvtap_state_t * state)926 xpvtap_user_ring_init(xpvtap_state_t *state)
927 {
928 xpvtap_user_ring_t *usring;
929
930
931 usring = &state->bt_user_ring;
932
933 /* alocate and initialize the page for the shared user ring */
934 usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE,
935 DDI_UMEM_SLEEP, &usring->ur_cookie);
936 SHARED_RING_INIT(usring->ur_sring);
937 FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE);
938 usring->ur_prod_polled = 0;
939
940 return (DDI_SUCCESS);
941 }
942
943
944 /*
945 * xpvtap_user_thread_init()
946 */
947 static int
xpvtap_user_thread_init(xpvtap_state_t * state)948 xpvtap_user_thread_init(xpvtap_state_t *state)
949 {
950 xpvtap_user_thread_t *thread;
951 char taskqname[32];
952
953
954 thread = &state->bt_thread;
955
956 mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL);
957 cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL);
958 cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL);
959 thread->ut_wake = B_FALSE;
960 thread->ut_exit = B_FALSE;
961 thread->ut_exit_done = B_TRUE;
962
963 /* create but don't start the user thread */
964 (void) sprintf(taskqname, "xvptap_%d", state->bt_instance);
965 thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1,
966 TASKQ_DEFAULTPRI, 0);
967 if (thread->ut_taskq == NULL) {
968 goto userinitthrfail_taskq_create;
969 }
970
971 return (DDI_SUCCESS);
972
973 userinitthrfail_taskq_dispatch:
974 ddi_taskq_destroy(thread->ut_taskq);
975 userinitthrfail_taskq_create:
976 cv_destroy(&thread->ut_exit_done_cv);
977 cv_destroy(&thread->ut_wake_cv);
978 mutex_destroy(&thread->ut_mutex);
979
980 return (DDI_FAILURE);
981 }
982
983
984 /*
985 * xpvtap_user_thread_start()
986 */
987 static void
xpvtap_user_thread_start(caddr_t arg)988 xpvtap_user_thread_start(caddr_t arg)
989 {
990 xpvtap_user_thread_t *thread;
991 xpvtap_state_t *state;
992 int e;
993
994
995 state = (xpvtap_state_t *)arg;
996 thread = &state->bt_thread;
997
998 /* start the user thread */
999 thread->ut_exit_done = B_FALSE;
1000 e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state,
1001 DDI_SLEEP);
1002 if (e != DDI_SUCCESS) {
1003 thread->ut_exit_done = B_TRUE;
1004 cmn_err(CE_WARN, "Unable to start user thread\n");
1005 }
1006 }
1007
1008
1009 /*
1010 * xpvtap_user_thread_stop()
1011 */
1012 static void
xpvtap_user_thread_stop(xpvtap_state_t * state)1013 xpvtap_user_thread_stop(xpvtap_state_t *state)
1014 {
1015 /* wake thread so it can exit */
1016 mutex_enter(&state->bt_thread.ut_mutex);
1017 state->bt_thread.ut_wake = B_TRUE;
1018 state->bt_thread.ut_exit = B_TRUE;
1019 cv_signal(&state->bt_thread.ut_wake_cv);
1020 if (!state->bt_thread.ut_exit_done) {
1021 cv_wait(&state->bt_thread.ut_exit_done_cv,
1022 &state->bt_thread.ut_mutex);
1023 }
1024 mutex_exit(&state->bt_thread.ut_mutex);
1025 ASSERT(state->bt_thread.ut_exit_done);
1026 }
1027
1028
1029 /*
1030 * xpvtap_user_fini()
1031 */
1032 static void
xpvtap_user_fini(xpvtap_state_t * state)1033 xpvtap_user_fini(xpvtap_state_t *state)
1034 {
1035 xpvtap_user_map_t *map;
1036
1037
1038 map = &state->bt_map;
1039
1040 xpvtap_user_thread_fini(state);
1041 xpvtap_rs_fini(&map->um_rs);
1042 kmem_free(map->um_outstanding_reqs,
1043 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
1044 xpvtap_user_ring_fini(state);
1045 }
1046
1047
1048 /*
1049 * xpvtap_user_ring_fini()
1050 */
1051 static void
xpvtap_user_ring_fini(xpvtap_state_t * state)1052 xpvtap_user_ring_fini(xpvtap_state_t *state)
1053 {
1054 ddi_umem_free(state->bt_user_ring.ur_cookie);
1055 }
1056
1057
1058 /*
1059 * xpvtap_user_thread_fini()
1060 */
1061 static void
xpvtap_user_thread_fini(xpvtap_state_t * state)1062 xpvtap_user_thread_fini(xpvtap_state_t *state)
1063 {
1064 ddi_taskq_destroy(state->bt_thread.ut_taskq);
1065 cv_destroy(&state->bt_thread.ut_exit_done_cv);
1066 cv_destroy(&state->bt_thread.ut_wake_cv);
1067 mutex_destroy(&state->bt_thread.ut_mutex);
1068 }
1069
1070
1071 /*
1072 * xpvtap_user_thread()
1073 */
1074 static void
xpvtap_user_thread(void * arg)1075 xpvtap_user_thread(void *arg)
1076 {
1077 xpvtap_user_thread_t *thread;
1078 blkif_response_t resp;
1079 xpvtap_state_t *state;
1080 blkif_request_t req;
1081 boolean_t b;
1082 uint_t uid;
1083 int e;
1084
1085
1086 state = (xpvtap_state_t *)arg;
1087 thread = &state->bt_thread;
1088
1089 xpvtap_thread_start:
1090 /* See if we are supposed to exit */
1091 mutex_enter(&thread->ut_mutex);
1092 if (thread->ut_exit) {
1093 thread->ut_exit_done = B_TRUE;
1094 cv_signal(&state->bt_thread.ut_exit_done_cv);
1095 mutex_exit(&thread->ut_mutex);
1096 return;
1097 }
1098
1099 /*
1100 * if we aren't supposed to be awake, wait until someone wakes us.
1101 * when we wake up, check for a kill or someone telling us to exit.
1102 */
1103 if (!thread->ut_wake) {
1104 e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex);
1105 if ((e == 0) || (thread->ut_exit)) {
1106 thread->ut_exit = B_TRUE;
1107 mutex_exit(&thread->ut_mutex);
1108 goto xpvtap_thread_start;
1109 }
1110 }
1111
1112 /* if someone didn't wake us, go back to the start of the thread */
1113 if (!thread->ut_wake) {
1114 mutex_exit(&thread->ut_mutex);
1115 goto xpvtap_thread_start;
1116 }
1117
1118 /* we are awake */
1119 thread->ut_wake = B_FALSE;
1120 mutex_exit(&thread->ut_mutex);
1121
1122 /* process requests from the guest */
1123 do {
1124 /*
1125 * check for requests from the guest. if we don't have any,
1126 * break out of the loop.
1127 */
1128 e = blk_ring_request_get(state->bt_guest_ring, &req);
1129 if (e == B_FALSE) {
1130 break;
1131 }
1132
1133 /* we got a request, map the grefs into the user app's VA */
1134 e = xpvtap_user_request_map(state, &req, &uid);
1135 if (e != DDI_SUCCESS) {
1136 /*
1137 * If we couldn't map the request (e.g. user app hasn't
1138 * opened the device yet), requeue it and try again
1139 * later
1140 */
1141 blk_ring_request_requeue(state->bt_guest_ring);
1142 break;
1143 }
1144
1145 /* push the request to the user app */
1146 e = xpvtap_user_request_push(state, &req, uid);
1147 if (e != DDI_SUCCESS) {
1148 resp.id = req.id;
1149 resp.operation = req.operation;
1150 resp.status = BLKIF_RSP_ERROR;
1151 blk_ring_response_put(state->bt_guest_ring, &resp);
1152 }
1153 } while (!thread->ut_exit);
1154
1155 /* process reponses from the user app */
1156 do {
1157 /*
1158 * check for responses from the user app. if we don't have any,
1159 * break out of the loop.
1160 */
1161 b = xpvtap_user_response_get(state, &resp, &uid);
1162 if (b != B_TRUE) {
1163 break;
1164 }
1165
1166 /*
1167 * if we got a response, unmap the grefs from the matching
1168 * request.
1169 */
1170 xpvtap_user_request_unmap(state, uid);
1171
1172 /* push the response to the guest */
1173 blk_ring_response_put(state->bt_guest_ring, &resp);
1174 } while (!thread->ut_exit);
1175
1176 goto xpvtap_thread_start;
1177 }
1178
1179
1180 /*
1181 * xpvtap_user_request_map()
1182 */
1183 static int
xpvtap_user_request_map(xpvtap_state_t * state,blkif_request_t * req,uint_t * uid)1184 xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
1185 uint_t *uid)
1186 {
1187 grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1188 struct seg *seg;
1189 struct as *as;
1190 domid_t domid;
1191 caddr_t uaddr;
1192 uint_t flags;
1193 int i;
1194 int e;
1195
1196
1197 domid = xvdi_get_oeid(state->bt_dip);
1198
1199 as = state->bt_map.um_as;
1200 if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) {
1201 return (DDI_FAILURE);
1202 }
1203
1204 /* has to happen after segmap returns */
1205 if (!state->bt_map.um_registered) {
1206 /* register the pte's with segmf */
1207 e = xpvtap_segmf_register(state);
1208 if (e != DDI_SUCCESS) {
1209 return (DDI_FAILURE);
1210 }
1211 }
1212
1213 /* alloc an ID for the user ring */
1214 e = xpvtap_rs_alloc(state->bt_map.um_rs, uid);
1215 if (e != DDI_SUCCESS) {
1216 return (DDI_FAILURE);
1217 }
1218
1219 /* if we don't have any segments to map, we're done */
1220 if ((req->operation == BLKIF_OP_WRITE_BARRIER) ||
1221 (req->operation == BLKIF_OP_FLUSH_DISKCACHE) ||
1222 (req->nr_segments == 0)) {
1223 return (DDI_SUCCESS);
1224 }
1225
1226 /* get the apps gref address */
1227 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid);
1228
1229 AS_LOCK_ENTER(as, RW_READER);
1230 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1231 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1232 (seg->s_base + seg->s_size))) {
1233 AS_LOCK_EXIT(as);
1234 return (DDI_FAILURE);
1235 }
1236
1237 /* if we are reading from disk, we are writing into memory */
1238 flags = 0;
1239 if (req->operation == BLKIF_OP_READ) {
1240 flags |= SEGMF_GREF_WR;
1241 }
1242
1243 /* Load the grefs into seg_mf */
1244 for (i = 0; i < req->nr_segments; i++) {
1245 gref[i] = req->seg[i].gref;
1246 }
1247 (void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments,
1248 domid);
1249
1250 AS_LOCK_EXIT(as);
1251
1252 return (DDI_SUCCESS);
1253 }
1254
1255
1256 /*
1257 * xpvtap_user_request_push()
1258 */
1259 static int
xpvtap_user_request_push(xpvtap_state_t * state,blkif_request_t * req,uint_t uid)1260 xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req,
1261 uint_t uid)
1262 {
1263 blkif_request_t *outstanding_req;
1264 blkif_front_ring_t *uring;
1265 blkif_request_t *target;
1266 xpvtap_user_map_t *map;
1267
1268
1269 uring = &state->bt_user_ring.ur_ring;
1270 map = &state->bt_map;
1271
1272 target = RING_GET_REQUEST(uring, uring->req_prod_pvt);
1273
1274 /*
1275 * Save request from the frontend. used for ID mapping and unmap
1276 * on response/cleanup
1277 */
1278 outstanding_req = &map->um_outstanding_reqs[uid];
1279 bcopy(req, outstanding_req, sizeof (*outstanding_req));
1280
1281 /* put the request on the user ring */
1282 bcopy(req, target, sizeof (*req));
1283 target->id = (uint64_t)uid;
1284 uring->req_prod_pvt++;
1285
1286 pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM);
1287
1288 return (DDI_SUCCESS);
1289 }
1290
1291
1292 static void
xpvtap_user_request_unmap(xpvtap_state_t * state,uint_t uid)1293 xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid)
1294 {
1295 blkif_request_t *req;
1296 struct seg *seg;
1297 struct as *as;
1298 caddr_t uaddr;
1299 int e;
1300
1301
1302 as = state->bt_map.um_as;
1303 if (as == NULL) {
1304 return;
1305 }
1306
1307 /* get a copy of the original request */
1308 req = &state->bt_map.um_outstanding_reqs[uid];
1309
1310 /* unmap the grefs for this request */
1311 if ((req->operation != BLKIF_OP_WRITE_BARRIER) &&
1312 (req->operation != BLKIF_OP_FLUSH_DISKCACHE) &&
1313 (req->nr_segments != 0)) {
1314 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid);
1315 AS_LOCK_ENTER(as, RW_READER);
1316 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1317 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1318 (seg->s_base + seg->s_size))) {
1319 AS_LOCK_EXIT(as);
1320 xpvtap_rs_free(state->bt_map.um_rs, uid);
1321 return;
1322 }
1323
1324 e = segmf_release_grefs(seg, uaddr, req->nr_segments);
1325 if (e != 0) {
1326 cmn_err(CE_WARN, "unable to release grefs");
1327 }
1328
1329 AS_LOCK_EXIT(as);
1330 }
1331
1332 /* free up the user ring id */
1333 xpvtap_rs_free(state->bt_map.um_rs, uid);
1334 }
1335
1336
1337 static int
xpvtap_user_response_get(xpvtap_state_t * state,blkif_response_t * resp,uint_t * uid)1338 xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp,
1339 uint_t *uid)
1340 {
1341 blkif_front_ring_t *uring;
1342 blkif_response_t *target;
1343
1344
1345 uring = &state->bt_user_ring.ur_ring;
1346
1347 if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) {
1348 return (B_FALSE);
1349 }
1350
1351 target = NULL;
1352 target = RING_GET_RESPONSE(uring, uring->rsp_cons);
1353 if (target == NULL) {
1354 return (B_FALSE);
1355 }
1356
1357 /* copy out the user app response */
1358 bcopy(target, resp, sizeof (*resp));
1359 uring->rsp_cons++;
1360
1361 /* restore the quests id from the original request */
1362 *uid = (uint_t)resp->id;
1363 resp->id = state->bt_map.um_outstanding_reqs[*uid].id;
1364
1365 return (B_TRUE);
1366 }
1367
1368
1369 /*
1370 * xpvtap_user_app_stop()
1371 */
xpvtap_user_app_stop(caddr_t arg)1372 static void xpvtap_user_app_stop(caddr_t arg)
1373 {
1374 xpvtap_state_t *state;
1375 clock_t rc;
1376
1377 state = (xpvtap_state_t *)arg;
1378
1379 /*
1380 * Give the app 10 secs to exit. If it doesn't exit, it's not a serious
1381 * problem, we just won't auto-detach the driver.
1382 */
1383 mutex_enter(&state->bt_open.bo_mutex);
1384 if (state->bt_open.bo_opened) {
1385 rc = cv_reltimedwait(&state->bt_open.bo_exit_cv,
1386 &state->bt_open.bo_mutex, drv_usectohz(10000000),
1387 TR_CLOCK_TICK);
1388 if (rc <= 0) {
1389 cmn_err(CE_NOTE, "!user process still has driver open, "
1390 "deferring detach\n");
1391 }
1392 }
1393 mutex_exit(&state->bt_open.bo_mutex);
1394 }
1395
1396
1397 /*
1398 * xpvtap_rs_init()
1399 * Initialize the resource structure. init() returns a handle to be used
1400 * for the rest of the resource functions. This code is written assuming
1401 * that min_val will be close to 0. Therefore, we will allocate the free
1402 * buffer only taking max_val into account.
1403 */
1404 static void
xpvtap_rs_init(uint_t min_val,uint_t max_val,xpvtap_rs_hdl_t * handle)1405 xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle)
1406 {
1407 xpvtap_rs_t *rstruct;
1408 uint_t array_size;
1409 uint_t index;
1410
1411
1412 ASSERT(handle != NULL);
1413 ASSERT(min_val < max_val);
1414
1415 /* alloc space for resource structure */
1416 rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP);
1417
1418 /*
1419 * Test to see if the max value is 64-bit aligned. If so, we don't need
1420 * to allocate an extra 64-bit word. alloc space for free buffer
1421 * (8 bytes per uint64_t).
1422 */
1423 if ((max_val & 0x3F) == 0) {
1424 rstruct->rs_free_size = (max_val >> 6) * 8;
1425 } else {
1426 rstruct->rs_free_size = ((max_val >> 6) + 1) * 8;
1427 }
1428 rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP);
1429
1430 /* Initialize resource structure */
1431 rstruct->rs_min = min_val;
1432 rstruct->rs_last = min_val;
1433 rstruct->rs_max = max_val;
1434 mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL);
1435 rstruct->rs_flushing = B_FALSE;
1436
1437 /* Mark all resources as free */
1438 array_size = rstruct->rs_free_size >> 3;
1439 for (index = 0; index < array_size; index++) {
1440 rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF;
1441 }
1442
1443 /* setup handle which is returned from this function */
1444 *handle = rstruct;
1445 }
1446
1447
1448 /*
1449 * xpvtap_rs_fini()
1450 * Frees up the space allocated in init(). Notice that a pointer to the
1451 * handle is used for the parameter. fini() will set the handle to NULL
1452 * before returning.
1453 */
1454 static void
xpvtap_rs_fini(xpvtap_rs_hdl_t * handle)1455 xpvtap_rs_fini(xpvtap_rs_hdl_t *handle)
1456 {
1457 xpvtap_rs_t *rstruct;
1458
1459
1460 ASSERT(handle != NULL);
1461
1462 rstruct = (xpvtap_rs_t *)*handle;
1463
1464 mutex_destroy(&rstruct->rs_mutex);
1465 kmem_free(rstruct->rs_free, rstruct->rs_free_size);
1466 kmem_free(rstruct, sizeof (xpvtap_rs_t));
1467
1468 /* set handle to null. This helps catch bugs. */
1469 *handle = NULL;
1470 }
1471
1472
1473 /*
1474 * xpvtap_rs_alloc()
1475 * alloc a resource. If alloc fails, we are out of resources.
1476 */
1477 static int
xpvtap_rs_alloc(xpvtap_rs_hdl_t handle,uint_t * resource)1478 xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource)
1479 {
1480 xpvtap_rs_t *rstruct;
1481 uint_t array_idx;
1482 uint64_t free;
1483 uint_t index;
1484 uint_t last;
1485 uint_t min;
1486 uint_t max;
1487
1488
1489 ASSERT(handle != NULL);
1490 ASSERT(resource != NULL);
1491
1492 rstruct = (xpvtap_rs_t *)handle;
1493
1494 mutex_enter(&rstruct->rs_mutex);
1495 min = rstruct->rs_min;
1496 max = rstruct->rs_max;
1497
1498 /*
1499 * Find a free resource. This will return out of the loop once it finds
1500 * a free resource. There are a total of 'max'-'min'+1 resources.
1501 * Performs a round robin allocation.
1502 */
1503 for (index = min; index <= max; index++) {
1504
1505 array_idx = rstruct->rs_last >> 6;
1506 free = rstruct->rs_free[array_idx];
1507 last = rstruct->rs_last & 0x3F;
1508
1509 /* if the next resource to check is free */
1510 if ((free & ((uint64_t)1 << last)) != 0) {
1511 /* we are using this resource */
1512 *resource = rstruct->rs_last;
1513
1514 /* take it out of the free list */
1515 rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last);
1516
1517 /*
1518 * increment the last count so we start checking the
1519 * next resource on the next alloc(). Note the rollover
1520 * at 'max'+1.
1521 */
1522 rstruct->rs_last++;
1523 if (rstruct->rs_last > max) {
1524 rstruct->rs_last = rstruct->rs_min;
1525 }
1526
1527 /* unlock the resource structure */
1528 mutex_exit(&rstruct->rs_mutex);
1529
1530 return (DDI_SUCCESS);
1531 }
1532
1533 /*
1534 * This resource is not free, lets go to the next one. Note the
1535 * rollover at 'max'.
1536 */
1537 rstruct->rs_last++;
1538 if (rstruct->rs_last > max) {
1539 rstruct->rs_last = rstruct->rs_min;
1540 }
1541 }
1542
1543 mutex_exit(&rstruct->rs_mutex);
1544
1545 return (DDI_FAILURE);
1546 }
1547
1548
1549 /*
1550 * xpvtap_rs_free()
1551 * Free the previously alloc'd resource. Once a resource has been free'd,
1552 * it can be used again when alloc is called.
1553 */
1554 static void
xpvtap_rs_free(xpvtap_rs_hdl_t handle,uint_t resource)1555 xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource)
1556 {
1557 xpvtap_rs_t *rstruct;
1558 uint_t array_idx;
1559 uint_t offset;
1560
1561
1562 ASSERT(handle != NULL);
1563
1564 rstruct = (xpvtap_rs_t *)handle;
1565 ASSERT(resource >= rstruct->rs_min);
1566 ASSERT(resource <= rstruct->rs_max);
1567
1568 if (!rstruct->rs_flushing) {
1569 mutex_enter(&rstruct->rs_mutex);
1570 }
1571
1572 /* Put the resource back in the free list */
1573 array_idx = resource >> 6;
1574 offset = resource & 0x3F;
1575 rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset);
1576
1577 if (!rstruct->rs_flushing) {
1578 mutex_exit(&rstruct->rs_mutex);
1579 }
1580 }
1581
1582
1583 /*
1584 * xpvtap_rs_flush()
1585 */
1586 static void
xpvtap_rs_flush(xpvtap_rs_hdl_t handle,xpvtap_rs_cleanup_t callback,void * arg)1587 xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback,
1588 void *arg)
1589 {
1590 xpvtap_rs_t *rstruct;
1591 uint_t array_idx;
1592 uint64_t free;
1593 uint_t index;
1594 uint_t last;
1595 uint_t min;
1596 uint_t max;
1597
1598
1599 ASSERT(handle != NULL);
1600
1601 rstruct = (xpvtap_rs_t *)handle;
1602
1603 mutex_enter(&rstruct->rs_mutex);
1604 min = rstruct->rs_min;
1605 max = rstruct->rs_max;
1606
1607 rstruct->rs_flushing = B_TRUE;
1608
1609 /*
1610 * for all resources not free, call the callback routine to clean it
1611 * up.
1612 */
1613 for (index = min; index <= max; index++) {
1614
1615 array_idx = rstruct->rs_last >> 6;
1616 free = rstruct->rs_free[array_idx];
1617 last = rstruct->rs_last & 0x3F;
1618
1619 /* if the next resource to check is not free */
1620 if ((free & ((uint64_t)1 << last)) == 0) {
1621 /* call the callback to cleanup */
1622 (*callback)(arg, rstruct->rs_last);
1623
1624 /* put it back in the free list */
1625 rstruct->rs_free[array_idx] |= ((uint64_t)1 << last);
1626 }
1627
1628 /* go to the next one. Note the rollover at 'max' */
1629 rstruct->rs_last++;
1630 if (rstruct->rs_last > max) {
1631 rstruct->rs_last = rstruct->rs_min;
1632 }
1633 }
1634
1635 mutex_exit(&rstruct->rs_mutex);
1636 }
1637