1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27
28 #include <sys/errno.h>
29 #include <sys/types.h>
30 #include <sys/conf.h>
31 #include <sys/kmem.h>
32 #include <sys/ddi.h>
33 #include <sys/stat.h>
34 #include <sys/sunddi.h>
35 #include <sys/file.h>
36 #include <sys/open.h>
37 #include <sys/modctl.h>
38 #include <sys/ddi_impldefs.h>
39 #include <sys/sysmacros.h>
40 #include <sys/ddidevmap.h>
41 #include <sys/policy.h>
42
43 #include <sys/vmsystm.h>
44 #include <vm/hat_i86.h>
45 #include <vm/hat_pte.h>
46 #include <vm/seg_kmem.h>
47 #include <vm/seg_mf.h>
48
49 #include <xen/io/blkif_impl.h>
50 #include <xen/io/blk_common.h>
51 #include <xen/io/xpvtap.h>
52
53
54 static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred);
55 static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred);
56 static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
57 cred_t *cred, int *rval);
58 static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off,
59 size_t len, size_t *maplen, uint_t model);
60 static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
61 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
62 cred_t *cred_p);
63 static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
64 struct pollhead **phpp);
65
66 static struct cb_ops xpvtap_cb_ops = {
67 xpvtap_open, /* cb_open */
68 xpvtap_close, /* cb_close */
69 nodev, /* cb_strategy */
70 nodev, /* cb_print */
71 nodev, /* cb_dump */
72 nodev, /* cb_read */
73 nodev, /* cb_write */
74 xpvtap_ioctl, /* cb_ioctl */
75 xpvtap_devmap, /* cb_devmap */
76 nodev, /* cb_mmap */
77 xpvtap_segmap, /* cb_segmap */
78 xpvtap_chpoll, /* cb_chpoll */
79 ddi_prop_op, /* cb_prop_op */
80 NULL, /* cb_stream */
81 D_NEW | D_MP | D_64BIT | D_DEVMAP, /* cb_flag */
82 CB_REV
83 };
84
85 static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
86 void **result);
87 static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
88 static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
89
90 static struct dev_ops xpvtap_dev_ops = {
91 DEVO_REV, /* devo_rev */
92 0, /* devo_refcnt */
93 xpvtap_getinfo, /* devo_getinfo */
94 nulldev, /* devo_identify */
95 nulldev, /* devo_probe */
96 xpvtap_attach, /* devo_attach */
97 xpvtap_detach, /* devo_detach */
98 nodev, /* devo_reset */
99 &xpvtap_cb_ops, /* devo_cb_ops */
100 NULL, /* devo_bus_ops */
101 NULL /* power */
102 };
103
104
105 static struct modldrv xpvtap_modldrv = {
106 &mod_driverops, /* Type of module. This one is a driver */
107 "xpvtap driver", /* Name of the module. */
108 &xpvtap_dev_ops, /* driver ops */
109 };
110
111 static struct modlinkage xpvtap_modlinkage = {
112 MODREV_1,
113 (void *) &xpvtap_modldrv,
114 NULL
115 };
116
117
118 void *xpvtap_statep;
119
120
121 static xpvtap_state_t *xpvtap_drv_init(int instance);
122 static void xpvtap_drv_fini(xpvtap_state_t *state);
123 static uint_t xpvtap_intr(caddr_t arg);
124
125 typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs);
126 static void xpvtap_rs_init(uint_t min_val, uint_t max_val,
127 xpvtap_rs_hdl_t *handle);
128 static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle);
129 static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs);
130 static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs);
131 static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle,
132 xpvtap_rs_cleanup_t callback, void *arg);
133
134 static int xpvtap_segmf_register(xpvtap_state_t *state);
135 static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event);
136
137 static int xpvtap_user_init(xpvtap_state_t *state);
138 static void xpvtap_user_fini(xpvtap_state_t *state);
139 static int xpvtap_user_ring_init(xpvtap_state_t *state);
140 static void xpvtap_user_ring_fini(xpvtap_state_t *state);
141 static int xpvtap_user_thread_init(xpvtap_state_t *state);
142 static void xpvtap_user_thread_fini(xpvtap_state_t *state);
143 static void xpvtap_user_thread_start(caddr_t arg);
144 static void xpvtap_user_thread_stop(xpvtap_state_t *state);
145 static void xpvtap_user_thread(void *arg);
146
147 static void xpvtap_user_app_stop(caddr_t arg);
148
149 static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
150 uint_t *uid);
151 static int xpvtap_user_request_push(xpvtap_state_t *state,
152 blkif_request_t *req, uint_t uid);
153 static int xpvtap_user_response_get(xpvtap_state_t *state,
154 blkif_response_t *resp, uint_t *uid);
155 static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid);
156
157
158 /*
159 * _init()
160 */
161 int
_init(void)162 _init(void)
163 {
164 int e;
165
166 e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1);
167 if (e != 0) {
168 return (e);
169 }
170
171 e = mod_install(&xpvtap_modlinkage);
172 if (e != 0) {
173 ddi_soft_state_fini(&xpvtap_statep);
174 return (e);
175 }
176
177 return (0);
178 }
179
180
181 /*
182 * _info()
183 */
184 int
_info(struct modinfo * modinfop)185 _info(struct modinfo *modinfop)
186 {
187 return (mod_info(&xpvtap_modlinkage, modinfop));
188 }
189
190
191 /*
192 * _fini()
193 */
194 int
_fini(void)195 _fini(void)
196 {
197 int e;
198
199 e = mod_remove(&xpvtap_modlinkage);
200 if (e != 0) {
201 return (e);
202 }
203
204 ddi_soft_state_fini(&xpvtap_statep);
205
206 return (0);
207 }
208
209
210 /*
211 * xpvtap_attach()
212 */
213 static int
xpvtap_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)214 xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
215 {
216 blk_ringinit_args_t args;
217 xpvtap_state_t *state;
218 int instance;
219 int e;
220
221
222 switch (cmd) {
223 case DDI_ATTACH:
224 break;
225
226 case DDI_RESUME:
227 return (DDI_SUCCESS);
228
229 default:
230 return (DDI_FAILURE);
231 }
232
233 /* initialize our state info */
234 instance = ddi_get_instance(dip);
235 state = xpvtap_drv_init(instance);
236 if (state == NULL) {
237 return (DDI_FAILURE);
238 }
239 state->bt_dip = dip;
240
241 /* Initialize the guest ring */
242 args.ar_dip = state->bt_dip;
243 args.ar_intr = xpvtap_intr;
244 args.ar_intr_arg = (caddr_t)state;
245 args.ar_ringup = xpvtap_user_thread_start;
246 args.ar_ringup_arg = (caddr_t)state;
247 args.ar_ringdown = xpvtap_user_app_stop;
248 args.ar_ringdown_arg = (caddr_t)state;
249 e = blk_ring_init(&args, &state->bt_guest_ring);
250 if (e != DDI_SUCCESS) {
251 goto attachfail_ringinit;
252 }
253
254 /* create the minor node (for ioctl/mmap) */
255 e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance,
256 DDI_PSEUDO, 0);
257 if (e != DDI_SUCCESS) {
258 goto attachfail_minor_node;
259 }
260
261 /* Report that driver was loaded */
262 ddi_report_dev(dip);
263
264 return (DDI_SUCCESS);
265
266 attachfail_minor_node:
267 blk_ring_fini(&state->bt_guest_ring);
268 attachfail_ringinit:
269 xpvtap_drv_fini(state);
270 return (DDI_FAILURE);
271 }
272
273
274 /*
275 * xpvtap_detach()
276 */
277 static int
xpvtap_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)278 xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
279 {
280 xpvtap_state_t *state;
281 int instance;
282
283
284 instance = ddi_get_instance(dip);
285 state = ddi_get_soft_state(xpvtap_statep, instance);
286 if (state == NULL) {
287 return (DDI_FAILURE);
288 }
289
290 switch (cmd) {
291 case DDI_DETACH:
292 break;
293
294 case DDI_SUSPEND:
295 default:
296 return (DDI_FAILURE);
297 }
298
299 xpvtap_user_thread_stop(state);
300 blk_ring_fini(&state->bt_guest_ring);
301 xpvtap_drv_fini(state);
302 ddi_remove_minor_node(dip, NULL);
303
304 return (DDI_SUCCESS);
305 }
306
307
308 /*
309 * xpvtap_getinfo()
310 */
311 /*ARGSUSED*/
312 static int
xpvtap_getinfo(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)313 xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
314 {
315 xpvtap_state_t *state;
316 int instance;
317 dev_t dev;
318 int e;
319
320
321 dev = (dev_t)arg;
322 instance = getminor(dev);
323
324 switch (cmd) {
325 case DDI_INFO_DEVT2DEVINFO:
326 state = ddi_get_soft_state(xpvtap_statep, instance);
327 if (state == NULL) {
328 return (DDI_FAILURE);
329 }
330 *result = (void *)state->bt_dip;
331 e = DDI_SUCCESS;
332 break;
333
334 case DDI_INFO_DEVT2INSTANCE:
335 *result = (void *)(uintptr_t)instance;
336 e = DDI_SUCCESS;
337 break;
338
339 default:
340 e = DDI_FAILURE;
341 break;
342 }
343
344 return (e);
345 }
346
347
348 /*
349 * xpvtap_open()
350 */
351 /*ARGSUSED*/
352 static int
xpvtap_open(dev_t * devp,int flag,int otyp,cred_t * cred)353 xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred)
354 {
355 xpvtap_state_t *state;
356 int instance;
357
358
359 if (secpolicy_xvm_control(cred)) {
360 return (EPERM);
361 }
362
363 instance = getminor(*devp);
364 state = ddi_get_soft_state(xpvtap_statep, instance);
365 if (state == NULL) {
366 return (ENXIO);
367 }
368
369 /* we should only be opened once */
370 mutex_enter(&state->bt_open.bo_mutex);
371 if (state->bt_open.bo_opened) {
372 mutex_exit(&state->bt_open.bo_mutex);
373 return (EBUSY);
374 }
375 state->bt_open.bo_opened = B_TRUE;
376 mutex_exit(&state->bt_open.bo_mutex);
377
378 /*
379 * save the apps address space. need it for mapping/unmapping grefs
380 * since will be doing it in a separate kernel thread.
381 */
382 state->bt_map.um_as = curproc->p_as;
383
384 return (0);
385 }
386
387
388 /*
389 * xpvtap_close()
390 */
391 /*ARGSUSED*/
392 static int
xpvtap_close(dev_t devp,int flag,int otyp,cred_t * cred)393 xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred)
394 {
395 xpvtap_state_t *state;
396 int instance;
397
398
399 instance = getminor(devp);
400 state = ddi_get_soft_state(xpvtap_statep, instance);
401 if (state == NULL) {
402 return (ENXIO);
403 }
404
405 /*
406 * wake thread so it can cleanup and wait for it to exit so we can
407 * be sure it's not in the middle of processing a request/response.
408 */
409 mutex_enter(&state->bt_thread.ut_mutex);
410 state->bt_thread.ut_wake = B_TRUE;
411 state->bt_thread.ut_exit = B_TRUE;
412 cv_signal(&state->bt_thread.ut_wake_cv);
413 if (!state->bt_thread.ut_exit_done) {
414 cv_wait(&state->bt_thread.ut_exit_done_cv,
415 &state->bt_thread.ut_mutex);
416 }
417 ASSERT(state->bt_thread.ut_exit_done);
418 mutex_exit(&state->bt_thread.ut_mutex);
419
420 state->bt_map.um_as = NULL;
421 state->bt_map.um_guest_pages = NULL;
422
423 /*
424 * when the ring is brought down, a userland hotplug script is run
425 * which tries to bring the userland app down. We'll wait for a bit
426 * for the user app to exit. Notify the thread waiting that the app
427 * has closed the driver.
428 */
429 mutex_enter(&state->bt_open.bo_mutex);
430 ASSERT(state->bt_open.bo_opened);
431 state->bt_open.bo_opened = B_FALSE;
432 cv_signal(&state->bt_open.bo_exit_cv);
433 mutex_exit(&state->bt_open.bo_mutex);
434
435 return (0);
436 }
437
438
439 /*
440 * xpvtap_ioctl()
441 */
442 /*ARGSUSED*/
443 static int
xpvtap_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * cred,int * rval)444 xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
445 int *rval)
446 {
447 xpvtap_state_t *state;
448 int instance;
449
450
451 if (secpolicy_xvm_control(cred)) {
452 return (EPERM);
453 }
454
455 instance = getminor(dev);
456 if (instance == -1) {
457 return (EBADF);
458 }
459
460 state = ddi_get_soft_state(xpvtap_statep, instance);
461 if (state == NULL) {
462 return (EBADF);
463 }
464
465 switch (cmd) {
466 case XPVTAP_IOCTL_RESP_PUSH:
467 /*
468 * wake thread, thread handles guest requests and user app
469 * responses.
470 */
471 mutex_enter(&state->bt_thread.ut_mutex);
472 state->bt_thread.ut_wake = B_TRUE;
473 cv_signal(&state->bt_thread.ut_wake_cv);
474 mutex_exit(&state->bt_thread.ut_mutex);
475 break;
476
477 default:
478 cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd);
479 return (ENXIO);
480 }
481
482 return (0);
483 }
484
485
486 /*
487 * xpvtap_segmap()
488 */
489 /*ARGSUSED*/
490 static int
xpvtap_segmap(dev_t dev,off_t off,struct as * asp,caddr_t * addrp,off_t len,unsigned int prot,unsigned int maxprot,unsigned int flags,cred_t * cred_p)491 xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
492 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
493 cred_t *cred_p)
494 {
495 struct segmf_crargs a;
496 xpvtap_state_t *state;
497 int instance;
498 int e;
499
500
501 if (secpolicy_xvm_control(cred_p)) {
502 return (EPERM);
503 }
504
505 instance = getminor(dev);
506 state = ddi_get_soft_state(xpvtap_statep, instance);
507 if (state == NULL) {
508 return (EBADF);
509 }
510
511 /* the user app should be doing a MAP_SHARED mapping */
512 if ((flags & MAP_TYPE) != MAP_SHARED) {
513 return (EINVAL);
514 }
515
516 /*
517 * if this is the user ring (offset = 0), devmap it (which ends up in
518 * xpvtap_devmap). devmap will alloc and map the ring into the
519 * app's VA space.
520 */
521 if (off == 0) {
522 e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len,
523 prot, maxprot, flags, cred_p);
524 return (e);
525 }
526
527 /* this should be the mmap for the gref pages (offset = PAGESIZE) */
528 if (off != PAGESIZE) {
529 return (EINVAL);
530 }
531
532 /* make sure we get the size we're expecting */
533 if (len != XPVTAP_GREF_BUFSIZE) {
534 return (EINVAL);
535 }
536
537 /*
538 * reserve user app VA space for the gref pages and use segmf to
539 * manage the backing store for the physical memory. segmf will
540 * map in/out the grefs and fault them in/out.
541 */
542 ASSERT(asp == state->bt_map.um_as);
543 as_rangelock(asp);
544 if ((flags & MAP_FIXED) == 0) {
545 map_addr(addrp, len, 0, 0, flags);
546 if (*addrp == NULL) {
547 as_rangeunlock(asp);
548 return (ENOMEM);
549 }
550 } else {
551 /* User specified address */
552 (void) as_unmap(asp, *addrp, len);
553 }
554 a.dev = dev;
555 a.prot = (uchar_t)prot;
556 a.maxprot = (uchar_t)maxprot;
557 e = as_map(asp, *addrp, len, segmf_create, &a);
558 if (e != 0) {
559 as_rangeunlock(asp);
560 return (e);
561 }
562 as_rangeunlock(asp);
563
564 /*
565 * Stash user base address, and compute address where the request
566 * array will end up.
567 */
568 state->bt_map.um_guest_pages = (caddr_t)*addrp;
569 state->bt_map.um_guest_size = (size_t)len;
570
571 /* register an as callback so we can cleanup when the app goes away */
572 e = as_add_callback(asp, xpvtap_segmf_unregister, state,
573 AS_UNMAP_EVENT, *addrp, len, KM_SLEEP);
574 if (e != 0) {
575 (void) as_unmap(asp, *addrp, len);
576 return (EINVAL);
577 }
578
579 /* wake thread to see if there are requests already queued up */
580 mutex_enter(&state->bt_thread.ut_mutex);
581 state->bt_thread.ut_wake = B_TRUE;
582 cv_signal(&state->bt_thread.ut_wake_cv);
583 mutex_exit(&state->bt_thread.ut_mutex);
584
585 return (0);
586 }
587
588
589 /*
590 * xpvtap_devmap()
591 */
592 /*ARGSUSED*/
593 static int
xpvtap_devmap(dev_t dev,devmap_cookie_t dhp,offset_t off,size_t len,size_t * maplen,uint_t model)594 xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
595 size_t *maplen, uint_t model)
596 {
597 xpvtap_user_ring_t *usring;
598 xpvtap_state_t *state;
599 int instance;
600 int e;
601
602
603 instance = getminor(dev);
604 state = ddi_get_soft_state(xpvtap_statep, instance);
605 if (state == NULL) {
606 return (EBADF);
607 }
608
609 /* we should only get here if the offset was == 0 */
610 if (off != 0) {
611 return (EINVAL);
612 }
613
614 /* we should only be mapping in one page */
615 if (len != PAGESIZE) {
616 return (EINVAL);
617 }
618
619 /*
620 * we already allocated the user ring during driver attach, all we
621 * need to do is map it into the user app's VA.
622 */
623 usring = &state->bt_user_ring;
624 e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0,
625 PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL);
626 if (e < 0) {
627 return (e);
628 }
629
630 /* return the size to compete the devmap */
631 *maplen = PAGESIZE;
632
633 return (0);
634 }
635
636
637 /*
638 * xpvtap_chpoll()
639 */
640 static int
xpvtap_chpoll(dev_t dev,short events,int anyyet,short * reventsp,struct pollhead ** phpp)641 xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
642 struct pollhead **phpp)
643 {
644 xpvtap_user_ring_t *usring;
645 xpvtap_state_t *state;
646 int instance;
647
648
649 instance = getminor(dev);
650 if (instance == -1) {
651 return (EBADF);
652 }
653 state = ddi_get_soft_state(xpvtap_statep, instance);
654 if (state == NULL) {
655 return (EBADF);
656 }
657
658 if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) {
659 *reventsp = 0;
660 return (EINVAL);
661 }
662
663 /*
664 * if we pushed requests on the user ring since the last poll, wakeup
665 * the user app
666 */
667 usring = &state->bt_user_ring;
668 if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) {
669
670 /*
671 * XXX - is this faster here or xpvtap_user_request_push??
672 * prelim data says here. Because less membars or because
673 * user thread will spin in poll requests before getting to
674 * responses?
675 */
676 RING_PUSH_REQUESTS(&usring->ur_ring);
677
678 usring->ur_prod_polled = usring->ur_ring.sring->req_prod;
679 *reventsp = POLLIN | POLLRDNORM;
680
681 /* no new requests */
682 } else {
683 *reventsp = 0;
684 if (!anyyet) {
685 *phpp = &state->bt_pollhead;
686 }
687 }
688
689 return (0);
690 }
691
692
693 /*
694 * xpvtap_drv_init()
695 */
696 static xpvtap_state_t *
xpvtap_drv_init(int instance)697 xpvtap_drv_init(int instance)
698 {
699 xpvtap_state_t *state;
700 int e;
701
702
703 e = ddi_soft_state_zalloc(xpvtap_statep, instance);
704 if (e != DDI_SUCCESS) {
705 return (NULL);
706 }
707 state = ddi_get_soft_state(xpvtap_statep, instance);
708 if (state == NULL) {
709 goto drvinitfail_get_soft_state;
710 }
711
712 state->bt_instance = instance;
713 mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL);
714 cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL);
715 state->bt_open.bo_opened = B_FALSE;
716 state->bt_map.um_registered = B_FALSE;
717
718 /* initialize user ring, thread, mapping state */
719 e = xpvtap_user_init(state);
720 if (e != DDI_SUCCESS) {
721 goto drvinitfail_userinit;
722 }
723
724 return (state);
725
726 drvinitfail_userinit:
727 cv_destroy(&state->bt_open.bo_exit_cv);
728 mutex_destroy(&state->bt_open.bo_mutex);
729 drvinitfail_get_soft_state:
730 (void) ddi_soft_state_free(xpvtap_statep, instance);
731 return (NULL);
732 }
733
734
735 /*
736 * xpvtap_drv_fini()
737 */
738 static void
xpvtap_drv_fini(xpvtap_state_t * state)739 xpvtap_drv_fini(xpvtap_state_t *state)
740 {
741 xpvtap_user_fini(state);
742 cv_destroy(&state->bt_open.bo_exit_cv);
743 mutex_destroy(&state->bt_open.bo_mutex);
744 (void) ddi_soft_state_free(xpvtap_statep, state->bt_instance);
745 }
746
747
748 /*
749 * xpvtap_intr()
750 * this routine will be called when we have a request on the guest ring.
751 */
752 static uint_t
xpvtap_intr(caddr_t arg)753 xpvtap_intr(caddr_t arg)
754 {
755 xpvtap_state_t *state;
756
757
758 state = (xpvtap_state_t *)arg;
759
760 /* wake thread, thread handles guest requests and user app responses */
761 mutex_enter(&state->bt_thread.ut_mutex);
762 state->bt_thread.ut_wake = B_TRUE;
763 cv_signal(&state->bt_thread.ut_wake_cv);
764 mutex_exit(&state->bt_thread.ut_mutex);
765
766 return (DDI_INTR_CLAIMED);
767 }
768
769
770 /*
771 * xpvtap_segmf_register()
772 */
773 static int
xpvtap_segmf_register(xpvtap_state_t * state)774 xpvtap_segmf_register(xpvtap_state_t *state)
775 {
776 struct seg *seg;
777 uint64_t pte_ma;
778 struct as *as;
779 caddr_t uaddr;
780 uint_t pgcnt;
781 int i;
782
783
784 as = state->bt_map.um_as;
785 pgcnt = btopr(state->bt_map.um_guest_size);
786 uaddr = state->bt_map.um_guest_pages;
787
788 if (pgcnt == 0) {
789 return (DDI_FAILURE);
790 }
791
792 AS_LOCK_ENTER(as, RW_READER);
793
794 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
795 if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) >
796 (seg->s_base + seg->s_size))) {
797 AS_LOCK_EXIT(as);
798 return (DDI_FAILURE);
799 }
800
801 /*
802 * lock down the htables so the HAT can't steal them. Register the
803 * PTE MA's for each gref page with seg_mf so we can do user space
804 * gref mappings.
805 */
806 for (i = 0; i < pgcnt; i++) {
807 hat_prepare_mapping(as->a_hat, uaddr, &pte_ma);
808 hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0,
809 PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK,
810 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
811 hat_release_mapping(as->a_hat, uaddr);
812 segmf_add_gref_pte(seg, uaddr, pte_ma);
813 uaddr += PAGESIZE;
814 }
815
816 state->bt_map.um_registered = B_TRUE;
817
818 AS_LOCK_EXIT(as);
819
820 return (DDI_SUCCESS);
821 }
822
823
824 /*
825 * xpvtap_segmf_unregister()
826 * as_callback routine
827 */
828 /*ARGSUSED*/
829 static void
xpvtap_segmf_unregister(struct as * as,void * arg,uint_t event)830 xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event)
831 {
832 xpvtap_state_t *state;
833 caddr_t uaddr;
834 uint_t pgcnt;
835 int i;
836
837
838 state = (xpvtap_state_t *)arg;
839 if (!state->bt_map.um_registered) {
840 /* remove the callback (which is this routine) */
841 (void) as_delete_callback(as, arg);
842 return;
843 }
844
845 pgcnt = btopr(state->bt_map.um_guest_size);
846 uaddr = state->bt_map.um_guest_pages;
847
848 /* unmap any outstanding req's grefs */
849 xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state);
850
851 /* Unlock the gref pages */
852 for (i = 0; i < pgcnt; i++) {
853 AS_LOCK_ENTER(as, RW_WRITER);
854 hat_prepare_mapping(as->a_hat, uaddr, NULL);
855 hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
856 hat_release_mapping(as->a_hat, uaddr);
857 AS_LOCK_EXIT(as);
858 uaddr += PAGESIZE;
859 }
860
861 /* remove the callback (which is this routine) */
862 (void) as_delete_callback(as, arg);
863
864 state->bt_map.um_registered = B_FALSE;
865 }
866
867
868 /*
869 * xpvtap_user_init()
870 */
871 static int
xpvtap_user_init(xpvtap_state_t * state)872 xpvtap_user_init(xpvtap_state_t *state)
873 {
874 xpvtap_user_map_t *map;
875 int e;
876
877
878 map = &state->bt_map;
879
880 /* Setup the ring between the driver and user app */
881 e = xpvtap_user_ring_init(state);
882 if (e != DDI_SUCCESS) {
883 return (DDI_FAILURE);
884 }
885
886 /*
887 * the user ring can handle BLKIF_RING_SIZE outstanding requests. This
888 * is the same number of requests as the guest ring. Initialize the
889 * state we use to track request IDs to the user app. These IDs will
890 * also identify which group of gref pages correspond with the
891 * request.
892 */
893 xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs);
894
895 /*
896 * allocate the space to store a copy of each outstanding requests. We
897 * will need to reference the ID and the number of segments when we
898 * get the response from the user app.
899 */
900 map->um_outstanding_reqs = kmem_zalloc(
901 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE,
902 KM_SLEEP);
903
904 /*
905 * initialize the thread we use to process guest requests and user
906 * responses.
907 */
908 e = xpvtap_user_thread_init(state);
909 if (e != DDI_SUCCESS) {
910 goto userinitfail_user_thread_init;
911 }
912
913 return (DDI_SUCCESS);
914
915 userinitfail_user_thread_init:
916 xpvtap_rs_fini(&map->um_rs);
917 kmem_free(map->um_outstanding_reqs,
918 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
919 xpvtap_user_ring_fini(state);
920 return (DDI_FAILURE);
921 }
922
923
924 /*
925 * xpvtap_user_ring_init()
926 */
927 static int
xpvtap_user_ring_init(xpvtap_state_t * state)928 xpvtap_user_ring_init(xpvtap_state_t *state)
929 {
930 xpvtap_user_ring_t *usring;
931
932
933 usring = &state->bt_user_ring;
934
935 /* alocate and initialize the page for the shared user ring */
936 usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE,
937 DDI_UMEM_SLEEP, &usring->ur_cookie);
938 SHARED_RING_INIT(usring->ur_sring);
939 FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE);
940 usring->ur_prod_polled = 0;
941
942 return (DDI_SUCCESS);
943 }
944
945
946 /*
947 * xpvtap_user_thread_init()
948 */
949 static int
xpvtap_user_thread_init(xpvtap_state_t * state)950 xpvtap_user_thread_init(xpvtap_state_t *state)
951 {
952 xpvtap_user_thread_t *thread;
953 char taskqname[32];
954
955
956 thread = &state->bt_thread;
957
958 mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL);
959 cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL);
960 cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL);
961 thread->ut_wake = B_FALSE;
962 thread->ut_exit = B_FALSE;
963 thread->ut_exit_done = B_TRUE;
964
965 /* create but don't start the user thread */
966 (void) sprintf(taskqname, "xvptap_%d", state->bt_instance);
967 thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1,
968 TASKQ_DEFAULTPRI, 0);
969 if (thread->ut_taskq == NULL) {
970 goto userinitthrfail_taskq_create;
971 }
972
973 return (DDI_SUCCESS);
974
975 userinitthrfail_taskq_dispatch:
976 ddi_taskq_destroy(thread->ut_taskq);
977 userinitthrfail_taskq_create:
978 cv_destroy(&thread->ut_exit_done_cv);
979 cv_destroy(&thread->ut_wake_cv);
980 mutex_destroy(&thread->ut_mutex);
981
982 return (DDI_FAILURE);
983 }
984
985
986 /*
987 * xpvtap_user_thread_start()
988 */
989 static void
xpvtap_user_thread_start(caddr_t arg)990 xpvtap_user_thread_start(caddr_t arg)
991 {
992 xpvtap_user_thread_t *thread;
993 xpvtap_state_t *state;
994 int e;
995
996
997 state = (xpvtap_state_t *)arg;
998 thread = &state->bt_thread;
999
1000 /* start the user thread */
1001 thread->ut_exit_done = B_FALSE;
1002 e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state,
1003 DDI_SLEEP);
1004 if (e != DDI_SUCCESS) {
1005 thread->ut_exit_done = B_TRUE;
1006 cmn_err(CE_WARN, "Unable to start user thread\n");
1007 }
1008 }
1009
1010
1011 /*
1012 * xpvtap_user_thread_stop()
1013 */
1014 static void
xpvtap_user_thread_stop(xpvtap_state_t * state)1015 xpvtap_user_thread_stop(xpvtap_state_t *state)
1016 {
1017 /* wake thread so it can exit */
1018 mutex_enter(&state->bt_thread.ut_mutex);
1019 state->bt_thread.ut_wake = B_TRUE;
1020 state->bt_thread.ut_exit = B_TRUE;
1021 cv_signal(&state->bt_thread.ut_wake_cv);
1022 if (!state->bt_thread.ut_exit_done) {
1023 cv_wait(&state->bt_thread.ut_exit_done_cv,
1024 &state->bt_thread.ut_mutex);
1025 }
1026 mutex_exit(&state->bt_thread.ut_mutex);
1027 ASSERT(state->bt_thread.ut_exit_done);
1028 }
1029
1030
1031 /*
1032 * xpvtap_user_fini()
1033 */
1034 static void
xpvtap_user_fini(xpvtap_state_t * state)1035 xpvtap_user_fini(xpvtap_state_t *state)
1036 {
1037 xpvtap_user_map_t *map;
1038
1039
1040 map = &state->bt_map;
1041
1042 xpvtap_user_thread_fini(state);
1043 xpvtap_rs_fini(&map->um_rs);
1044 kmem_free(map->um_outstanding_reqs,
1045 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
1046 xpvtap_user_ring_fini(state);
1047 }
1048
1049
1050 /*
1051 * xpvtap_user_ring_fini()
1052 */
1053 static void
xpvtap_user_ring_fini(xpvtap_state_t * state)1054 xpvtap_user_ring_fini(xpvtap_state_t *state)
1055 {
1056 ddi_umem_free(state->bt_user_ring.ur_cookie);
1057 }
1058
1059
1060 /*
1061 * xpvtap_user_thread_fini()
1062 */
1063 static void
xpvtap_user_thread_fini(xpvtap_state_t * state)1064 xpvtap_user_thread_fini(xpvtap_state_t *state)
1065 {
1066 ddi_taskq_destroy(state->bt_thread.ut_taskq);
1067 cv_destroy(&state->bt_thread.ut_exit_done_cv);
1068 cv_destroy(&state->bt_thread.ut_wake_cv);
1069 mutex_destroy(&state->bt_thread.ut_mutex);
1070 }
1071
1072
1073 /*
1074 * xpvtap_user_thread()
1075 */
1076 static void
xpvtap_user_thread(void * arg)1077 xpvtap_user_thread(void *arg)
1078 {
1079 xpvtap_user_thread_t *thread;
1080 blkif_response_t resp;
1081 xpvtap_state_t *state;
1082 blkif_request_t req;
1083 boolean_t b;
1084 uint_t uid;
1085 int e;
1086
1087
1088 state = (xpvtap_state_t *)arg;
1089 thread = &state->bt_thread;
1090
1091 xpvtap_thread_start:
1092 /* See if we are supposed to exit */
1093 mutex_enter(&thread->ut_mutex);
1094 if (thread->ut_exit) {
1095 thread->ut_exit_done = B_TRUE;
1096 cv_signal(&state->bt_thread.ut_exit_done_cv);
1097 mutex_exit(&thread->ut_mutex);
1098 return;
1099 }
1100
1101 /*
1102 * if we aren't supposed to be awake, wait until someone wakes us.
1103 * when we wake up, check for a kill or someone telling us to exit.
1104 */
1105 if (!thread->ut_wake) {
1106 e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex);
1107 if ((e == 0) || (thread->ut_exit)) {
1108 thread->ut_exit = B_TRUE;
1109 mutex_exit(&thread->ut_mutex);
1110 goto xpvtap_thread_start;
1111 }
1112 }
1113
1114 /* if someone didn't wake us, go back to the start of the thread */
1115 if (!thread->ut_wake) {
1116 mutex_exit(&thread->ut_mutex);
1117 goto xpvtap_thread_start;
1118 }
1119
1120 /* we are awake */
1121 thread->ut_wake = B_FALSE;
1122 mutex_exit(&thread->ut_mutex);
1123
1124 /* process requests from the guest */
1125 do {
1126 /*
1127 * check for requests from the guest. if we don't have any,
1128 * break out of the loop.
1129 */
1130 e = blk_ring_request_get(state->bt_guest_ring, &req);
1131 if (e == B_FALSE) {
1132 break;
1133 }
1134
1135 /* we got a request, map the grefs into the user app's VA */
1136 e = xpvtap_user_request_map(state, &req, &uid);
1137 if (e != DDI_SUCCESS) {
1138 /*
1139 * If we couldn't map the request (e.g. user app hasn't
1140 * opened the device yet), requeue it and try again
1141 * later
1142 */
1143 blk_ring_request_requeue(state->bt_guest_ring);
1144 break;
1145 }
1146
1147 /* push the request to the user app */
1148 e = xpvtap_user_request_push(state, &req, uid);
1149 if (e != DDI_SUCCESS) {
1150 resp.id = req.id;
1151 resp.operation = req.operation;
1152 resp.status = BLKIF_RSP_ERROR;
1153 blk_ring_response_put(state->bt_guest_ring, &resp);
1154 }
1155 } while (!thread->ut_exit);
1156
1157 /* process reponses from the user app */
1158 do {
1159 /*
1160 * check for responses from the user app. if we don't have any,
1161 * break out of the loop.
1162 */
1163 b = xpvtap_user_response_get(state, &resp, &uid);
1164 if (b != B_TRUE) {
1165 break;
1166 }
1167
1168 /*
1169 * if we got a response, unmap the grefs from the matching
1170 * request.
1171 */
1172 xpvtap_user_request_unmap(state, uid);
1173
1174 /* push the response to the guest */
1175 blk_ring_response_put(state->bt_guest_ring, &resp);
1176 } while (!thread->ut_exit);
1177
1178 goto xpvtap_thread_start;
1179 }
1180
1181
1182 /*
1183 * xpvtap_user_request_map()
1184 */
1185 static int
xpvtap_user_request_map(xpvtap_state_t * state,blkif_request_t * req,uint_t * uid)1186 xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
1187 uint_t *uid)
1188 {
1189 grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1190 struct seg *seg;
1191 struct as *as;
1192 domid_t domid;
1193 caddr_t uaddr;
1194 uint_t flags;
1195 int i;
1196 int e;
1197
1198
1199 domid = xvdi_get_oeid(state->bt_dip);
1200
1201 as = state->bt_map.um_as;
1202 if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) {
1203 return (DDI_FAILURE);
1204 }
1205
1206 /* has to happen after segmap returns */
1207 if (!state->bt_map.um_registered) {
1208 /* register the pte's with segmf */
1209 e = xpvtap_segmf_register(state);
1210 if (e != DDI_SUCCESS) {
1211 return (DDI_FAILURE);
1212 }
1213 }
1214
1215 /* alloc an ID for the user ring */
1216 e = xpvtap_rs_alloc(state->bt_map.um_rs, uid);
1217 if (e != DDI_SUCCESS) {
1218 return (DDI_FAILURE);
1219 }
1220
1221 /* if we don't have any segments to map, we're done */
1222 if ((req->operation == BLKIF_OP_WRITE_BARRIER) ||
1223 (req->operation == BLKIF_OP_FLUSH_DISKCACHE) ||
1224 (req->nr_segments == 0)) {
1225 return (DDI_SUCCESS);
1226 }
1227
1228 /* get the apps gref address */
1229 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid);
1230
1231 AS_LOCK_ENTER(as, RW_READER);
1232 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1233 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1234 (seg->s_base + seg->s_size))) {
1235 AS_LOCK_EXIT(as);
1236 return (DDI_FAILURE);
1237 }
1238
1239 /* if we are reading from disk, we are writing into memory */
1240 flags = 0;
1241 if (req->operation == BLKIF_OP_READ) {
1242 flags |= SEGMF_GREF_WR;
1243 }
1244
1245 /* Load the grefs into seg_mf */
1246 for (i = 0; i < req->nr_segments; i++) {
1247 gref[i] = req->seg[i].gref;
1248 }
1249 (void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments,
1250 domid);
1251
1252 AS_LOCK_EXIT(as);
1253
1254 return (DDI_SUCCESS);
1255 }
1256
1257
1258 /*
1259 * xpvtap_user_request_push()
1260 */
1261 static int
xpvtap_user_request_push(xpvtap_state_t * state,blkif_request_t * req,uint_t uid)1262 xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req,
1263 uint_t uid)
1264 {
1265 blkif_request_t *outstanding_req;
1266 blkif_front_ring_t *uring;
1267 blkif_request_t *target;
1268 xpvtap_user_map_t *map;
1269
1270
1271 uring = &state->bt_user_ring.ur_ring;
1272 map = &state->bt_map;
1273
1274 target = RING_GET_REQUEST(uring, uring->req_prod_pvt);
1275
1276 /*
1277 * Save request from the frontend. used for ID mapping and unmap
1278 * on response/cleanup
1279 */
1280 outstanding_req = &map->um_outstanding_reqs[uid];
1281 bcopy(req, outstanding_req, sizeof (*outstanding_req));
1282
1283 /* put the request on the user ring */
1284 bcopy(req, target, sizeof (*req));
1285 target->id = (uint64_t)uid;
1286 uring->req_prod_pvt++;
1287
1288 pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM);
1289
1290 return (DDI_SUCCESS);
1291 }
1292
1293
1294 static void
xpvtap_user_request_unmap(xpvtap_state_t * state,uint_t uid)1295 xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid)
1296 {
1297 blkif_request_t *req;
1298 struct seg *seg;
1299 struct as *as;
1300 caddr_t uaddr;
1301 int e;
1302
1303
1304 as = state->bt_map.um_as;
1305 if (as == NULL) {
1306 return;
1307 }
1308
1309 /* get a copy of the original request */
1310 req = &state->bt_map.um_outstanding_reqs[uid];
1311
1312 /* unmap the grefs for this request */
1313 if ((req->operation != BLKIF_OP_WRITE_BARRIER) &&
1314 (req->operation != BLKIF_OP_FLUSH_DISKCACHE) &&
1315 (req->nr_segments != 0)) {
1316 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid);
1317 AS_LOCK_ENTER(as, RW_READER);
1318 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1319 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1320 (seg->s_base + seg->s_size))) {
1321 AS_LOCK_EXIT(as);
1322 xpvtap_rs_free(state->bt_map.um_rs, uid);
1323 return;
1324 }
1325
1326 e = segmf_release_grefs(seg, uaddr, req->nr_segments);
1327 if (e != 0) {
1328 cmn_err(CE_WARN, "unable to release grefs");
1329 }
1330
1331 AS_LOCK_EXIT(as);
1332 }
1333
1334 /* free up the user ring id */
1335 xpvtap_rs_free(state->bt_map.um_rs, uid);
1336 }
1337
1338
1339 static int
xpvtap_user_response_get(xpvtap_state_t * state,blkif_response_t * resp,uint_t * uid)1340 xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp,
1341 uint_t *uid)
1342 {
1343 blkif_front_ring_t *uring;
1344 blkif_response_t *target;
1345
1346
1347 uring = &state->bt_user_ring.ur_ring;
1348
1349 if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) {
1350 return (B_FALSE);
1351 }
1352
1353 target = NULL;
1354 target = RING_GET_RESPONSE(uring, uring->rsp_cons);
1355 if (target == NULL) {
1356 return (B_FALSE);
1357 }
1358
1359 /* copy out the user app response */
1360 bcopy(target, resp, sizeof (*resp));
1361 uring->rsp_cons++;
1362
1363 /* restore the quests id from the original request */
1364 *uid = (uint_t)resp->id;
1365 resp->id = state->bt_map.um_outstanding_reqs[*uid].id;
1366
1367 return (B_TRUE);
1368 }
1369
1370
1371 /*
1372 * xpvtap_user_app_stop()
1373 */
xpvtap_user_app_stop(caddr_t arg)1374 static void xpvtap_user_app_stop(caddr_t arg)
1375 {
1376 xpvtap_state_t *state;
1377 clock_t rc;
1378
1379 state = (xpvtap_state_t *)arg;
1380
1381 /*
1382 * Give the app 10 secs to exit. If it doesn't exit, it's not a serious
1383 * problem, we just won't auto-detach the driver.
1384 */
1385 mutex_enter(&state->bt_open.bo_mutex);
1386 if (state->bt_open.bo_opened) {
1387 rc = cv_reltimedwait(&state->bt_open.bo_exit_cv,
1388 &state->bt_open.bo_mutex, drv_usectohz(10000000),
1389 TR_CLOCK_TICK);
1390 if (rc <= 0) {
1391 cmn_err(CE_NOTE, "!user process still has driver open, "
1392 "deferring detach\n");
1393 }
1394 }
1395 mutex_exit(&state->bt_open.bo_mutex);
1396 }
1397
1398
1399 /*
1400 * xpvtap_rs_init()
1401 * Initialize the resource structure. init() returns a handle to be used
1402 * for the rest of the resource functions. This code is written assuming
1403 * that min_val will be close to 0. Therefore, we will allocate the free
1404 * buffer only taking max_val into account.
1405 */
1406 static void
xpvtap_rs_init(uint_t min_val,uint_t max_val,xpvtap_rs_hdl_t * handle)1407 xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle)
1408 {
1409 xpvtap_rs_t *rstruct;
1410 uint_t array_size;
1411 uint_t index;
1412
1413
1414 ASSERT(handle != NULL);
1415 ASSERT(min_val < max_val);
1416
1417 /* alloc space for resource structure */
1418 rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP);
1419
1420 /*
1421 * Test to see if the max value is 64-bit aligned. If so, we don't need
1422 * to allocate an extra 64-bit word. alloc space for free buffer
1423 * (8 bytes per uint64_t).
1424 */
1425 if ((max_val & 0x3F) == 0) {
1426 rstruct->rs_free_size = (max_val >> 6) * 8;
1427 } else {
1428 rstruct->rs_free_size = ((max_val >> 6) + 1) * 8;
1429 }
1430 rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP);
1431
1432 /* Initialize resource structure */
1433 rstruct->rs_min = min_val;
1434 rstruct->rs_last = min_val;
1435 rstruct->rs_max = max_val;
1436 mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL);
1437 rstruct->rs_flushing = B_FALSE;
1438
1439 /* Mark all resources as free */
1440 array_size = rstruct->rs_free_size >> 3;
1441 for (index = 0; index < array_size; index++) {
1442 rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF;
1443 }
1444
1445 /* setup handle which is returned from this function */
1446 *handle = rstruct;
1447 }
1448
1449
1450 /*
1451 * xpvtap_rs_fini()
1452 * Frees up the space allocated in init(). Notice that a pointer to the
1453 * handle is used for the parameter. fini() will set the handle to NULL
1454 * before returning.
1455 */
1456 static void
xpvtap_rs_fini(xpvtap_rs_hdl_t * handle)1457 xpvtap_rs_fini(xpvtap_rs_hdl_t *handle)
1458 {
1459 xpvtap_rs_t *rstruct;
1460
1461
1462 ASSERT(handle != NULL);
1463
1464 rstruct = (xpvtap_rs_t *)*handle;
1465
1466 mutex_destroy(&rstruct->rs_mutex);
1467 kmem_free(rstruct->rs_free, rstruct->rs_free_size);
1468 kmem_free(rstruct, sizeof (xpvtap_rs_t));
1469
1470 /* set handle to null. This helps catch bugs. */
1471 *handle = NULL;
1472 }
1473
1474
1475 /*
1476 * xpvtap_rs_alloc()
1477 * alloc a resource. If alloc fails, we are out of resources.
1478 */
1479 static int
xpvtap_rs_alloc(xpvtap_rs_hdl_t handle,uint_t * resource)1480 xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource)
1481 {
1482 xpvtap_rs_t *rstruct;
1483 uint_t array_idx;
1484 uint64_t free;
1485 uint_t index;
1486 uint_t last;
1487 uint_t min;
1488 uint_t max;
1489
1490
1491 ASSERT(handle != NULL);
1492 ASSERT(resource != NULL);
1493
1494 rstruct = (xpvtap_rs_t *)handle;
1495
1496 mutex_enter(&rstruct->rs_mutex);
1497 min = rstruct->rs_min;
1498 max = rstruct->rs_max;
1499
1500 /*
1501 * Find a free resource. This will return out of the loop once it finds
1502 * a free resource. There are a total of 'max'-'min'+1 resources.
1503 * Performs a round robin allocation.
1504 */
1505 for (index = min; index <= max; index++) {
1506
1507 array_idx = rstruct->rs_last >> 6;
1508 free = rstruct->rs_free[array_idx];
1509 last = rstruct->rs_last & 0x3F;
1510
1511 /* if the next resource to check is free */
1512 if ((free & ((uint64_t)1 << last)) != 0) {
1513 /* we are using this resource */
1514 *resource = rstruct->rs_last;
1515
1516 /* take it out of the free list */
1517 rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last);
1518
1519 /*
1520 * increment the last count so we start checking the
1521 * next resource on the next alloc(). Note the rollover
1522 * at 'max'+1.
1523 */
1524 rstruct->rs_last++;
1525 if (rstruct->rs_last > max) {
1526 rstruct->rs_last = rstruct->rs_min;
1527 }
1528
1529 /* unlock the resource structure */
1530 mutex_exit(&rstruct->rs_mutex);
1531
1532 return (DDI_SUCCESS);
1533 }
1534
1535 /*
1536 * This resource is not free, lets go to the next one. Note the
1537 * rollover at 'max'.
1538 */
1539 rstruct->rs_last++;
1540 if (rstruct->rs_last > max) {
1541 rstruct->rs_last = rstruct->rs_min;
1542 }
1543 }
1544
1545 mutex_exit(&rstruct->rs_mutex);
1546
1547 return (DDI_FAILURE);
1548 }
1549
1550
1551 /*
1552 * xpvtap_rs_free()
1553 * Free the previously alloc'd resource. Once a resource has been free'd,
1554 * it can be used again when alloc is called.
1555 */
1556 static void
xpvtap_rs_free(xpvtap_rs_hdl_t handle,uint_t resource)1557 xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource)
1558 {
1559 xpvtap_rs_t *rstruct;
1560 uint_t array_idx;
1561 uint_t offset;
1562
1563
1564 ASSERT(handle != NULL);
1565
1566 rstruct = (xpvtap_rs_t *)handle;
1567 ASSERT(resource >= rstruct->rs_min);
1568 ASSERT(resource <= rstruct->rs_max);
1569
1570 if (!rstruct->rs_flushing) {
1571 mutex_enter(&rstruct->rs_mutex);
1572 }
1573
1574 /* Put the resource back in the free list */
1575 array_idx = resource >> 6;
1576 offset = resource & 0x3F;
1577 rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset);
1578
1579 if (!rstruct->rs_flushing) {
1580 mutex_exit(&rstruct->rs_mutex);
1581 }
1582 }
1583
1584
1585 /*
1586 * xpvtap_rs_flush()
1587 */
1588 static void
xpvtap_rs_flush(xpvtap_rs_hdl_t handle,xpvtap_rs_cleanup_t callback,void * arg)1589 xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback,
1590 void *arg)
1591 {
1592 xpvtap_rs_t *rstruct;
1593 uint_t array_idx;
1594 uint64_t free;
1595 uint_t index;
1596 uint_t last;
1597 uint_t min;
1598 uint_t max;
1599
1600
1601 ASSERT(handle != NULL);
1602
1603 rstruct = (xpvtap_rs_t *)handle;
1604
1605 mutex_enter(&rstruct->rs_mutex);
1606 min = rstruct->rs_min;
1607 max = rstruct->rs_max;
1608
1609 rstruct->rs_flushing = B_TRUE;
1610
1611 /*
1612 * for all resources not free, call the callback routine to clean it
1613 * up.
1614 */
1615 for (index = min; index <= max; index++) {
1616
1617 array_idx = rstruct->rs_last >> 6;
1618 free = rstruct->rs_free[array_idx];
1619 last = rstruct->rs_last & 0x3F;
1620
1621 /* if the next resource to check is not free */
1622 if ((free & ((uint64_t)1 << last)) == 0) {
1623 /* call the callback to cleanup */
1624 (*callback)(arg, rstruct->rs_last);
1625
1626 /* put it back in the free list */
1627 rstruct->rs_free[array_idx] |= ((uint64_t)1 << last);
1628 }
1629
1630 /* go to the next one. Note the rollover at 'max' */
1631 rstruct->rs_last++;
1632 if (rstruct->rs_last > max) {
1633 rstruct->rs_last = rstruct->rs_min;
1634 }
1635 }
1636
1637 mutex_exit(&rstruct->rs_mutex);
1638 }
1639