1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2017 Joyent, Inc. 26 */ 27 28 29 #include <sys/errno.h> 30 #include <sys/types.h> 31 #include <sys/conf.h> 32 #include <sys/kmem.h> 33 #include <sys/ddi.h> 34 #include <sys/stat.h> 35 #include <sys/sunddi.h> 36 #include <sys/file.h> 37 #include <sys/open.h> 38 #include <sys/modctl.h> 39 #include <sys/ddi_impldefs.h> 40 #include <sys/sysmacros.h> 41 #include <sys/ddidevmap.h> 42 #include <sys/policy.h> 43 44 #include <sys/vmsystm.h> 45 #include <vm/hat_i86.h> 46 #include <vm/hat_pte.h> 47 #include <vm/seg_kmem.h> 48 #include <vm/seg_mf.h> 49 50 #include <xen/io/blkif_impl.h> 51 #include <xen/io/blk_common.h> 52 #include <xen/io/xpvtap.h> 53 54 55 static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred); 56 static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred); 57 static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 58 cred_t *cred, int *rval); 59 static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, 60 size_t len, size_t *maplen, uint_t model); 61 static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp, 62 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags, 63 cred_t *cred_p); 64 static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 65 struct pollhead **phpp); 66 67 static struct cb_ops xpvtap_cb_ops = { 68 xpvtap_open, /* cb_open */ 69 xpvtap_close, /* cb_close */ 70 nodev, /* cb_strategy */ 71 nodev, /* cb_print */ 72 nodev, /* cb_dump */ 73 nodev, /* cb_read */ 74 nodev, /* cb_write */ 75 xpvtap_ioctl, /* cb_ioctl */ 76 xpvtap_devmap, /* cb_devmap */ 77 nodev, /* cb_mmap */ 78 xpvtap_segmap, /* cb_segmap */ 79 xpvtap_chpoll, /* cb_chpoll */ 80 ddi_prop_op, /* cb_prop_op */ 81 NULL, /* cb_stream */ 82 D_NEW | D_MP | D_64BIT | D_DEVMAP, /* cb_flag */ 83 CB_REV 84 }; 85 86 static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, 87 void **result); 88 static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd); 89 static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd); 90 91 static struct dev_ops xpvtap_dev_ops = { 92 DEVO_REV, /* devo_rev */ 93 0, /* devo_refcnt */ 94 xpvtap_getinfo, /* devo_getinfo */ 95 nulldev, /* devo_identify */ 96 nulldev, /* devo_probe */ 97 xpvtap_attach, /* devo_attach */ 98 xpvtap_detach, /* devo_detach */ 99 nodev, /* devo_reset */ 100 &xpvtap_cb_ops, /* devo_cb_ops */ 101 NULL, /* devo_bus_ops */ 102 NULL /* power */ 103 }; 104 105 106 static struct modldrv xpvtap_modldrv = { 107 &mod_driverops, /* Type of module. This one is a driver */ 108 "xpvtap driver", /* Name of the module. */ 109 &xpvtap_dev_ops, /* driver ops */ 110 }; 111 112 static struct modlinkage xpvtap_modlinkage = { 113 MODREV_1, 114 (void *) &xpvtap_modldrv, 115 NULL 116 }; 117 118 119 void *xpvtap_statep; 120 121 122 static xpvtap_state_t *xpvtap_drv_init(int instance); 123 static void xpvtap_drv_fini(xpvtap_state_t *state); 124 static uint_t xpvtap_intr(caddr_t arg); 125 126 typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs); 127 static void xpvtap_rs_init(uint_t min_val, uint_t max_val, 128 xpvtap_rs_hdl_t *handle); 129 static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle); 130 static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs); 131 static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs); 132 static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle, 133 xpvtap_rs_cleanup_t callback, void *arg); 134 135 static int xpvtap_segmf_register(xpvtap_state_t *state); 136 static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event); 137 138 static int xpvtap_user_init(xpvtap_state_t *state); 139 static void xpvtap_user_fini(xpvtap_state_t *state); 140 static int xpvtap_user_ring_init(xpvtap_state_t *state); 141 static void xpvtap_user_ring_fini(xpvtap_state_t *state); 142 static int xpvtap_user_thread_init(xpvtap_state_t *state); 143 static void xpvtap_user_thread_fini(xpvtap_state_t *state); 144 static void xpvtap_user_thread_start(caddr_t arg); 145 static void xpvtap_user_thread_stop(xpvtap_state_t *state); 146 static void xpvtap_user_thread(void *arg); 147 148 static void xpvtap_user_app_stop(caddr_t arg); 149 150 static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req, 151 uint_t *uid); 152 static int xpvtap_user_request_push(xpvtap_state_t *state, 153 blkif_request_t *req, uint_t uid); 154 static int xpvtap_user_response_get(xpvtap_state_t *state, 155 blkif_response_t *resp, uint_t *uid); 156 static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid); 157 158 159 /* 160 * _init() 161 */ 162 int 163 _init(void) 164 { 165 int e; 166 167 e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1); 168 if (e != 0) { 169 return (e); 170 } 171 172 e = mod_install(&xpvtap_modlinkage); 173 if (e != 0) { 174 ddi_soft_state_fini(&xpvtap_statep); 175 return (e); 176 } 177 178 return (0); 179 } 180 181 182 /* 183 * _info() 184 */ 185 int 186 _info(struct modinfo *modinfop) 187 { 188 return (mod_info(&xpvtap_modlinkage, modinfop)); 189 } 190 191 192 /* 193 * _fini() 194 */ 195 int 196 _fini(void) 197 { 198 int e; 199 200 e = mod_remove(&xpvtap_modlinkage); 201 if (e != 0) { 202 return (e); 203 } 204 205 ddi_soft_state_fini(&xpvtap_statep); 206 207 return (0); 208 } 209 210 211 /* 212 * xpvtap_attach() 213 */ 214 static int 215 xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 216 { 217 blk_ringinit_args_t args; 218 xpvtap_state_t *state; 219 int instance; 220 int e; 221 222 223 switch (cmd) { 224 case DDI_ATTACH: 225 break; 226 227 case DDI_RESUME: 228 return (DDI_SUCCESS); 229 230 default: 231 return (DDI_FAILURE); 232 } 233 234 /* initialize our state info */ 235 instance = ddi_get_instance(dip); 236 state = xpvtap_drv_init(instance); 237 if (state == NULL) { 238 return (DDI_FAILURE); 239 } 240 state->bt_dip = dip; 241 242 /* Initialize the guest ring */ 243 args.ar_dip = state->bt_dip; 244 args.ar_intr = xpvtap_intr; 245 args.ar_intr_arg = (caddr_t)state; 246 args.ar_ringup = xpvtap_user_thread_start; 247 args.ar_ringup_arg = (caddr_t)state; 248 args.ar_ringdown = xpvtap_user_app_stop; 249 args.ar_ringdown_arg = (caddr_t)state; 250 e = blk_ring_init(&args, &state->bt_guest_ring); 251 if (e != DDI_SUCCESS) { 252 goto attachfail_ringinit; 253 } 254 255 /* create the minor node (for ioctl/mmap) */ 256 e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance, 257 DDI_PSEUDO, 0); 258 if (e != DDI_SUCCESS) { 259 goto attachfail_minor_node; 260 } 261 262 /* Report that driver was loaded */ 263 ddi_report_dev(dip); 264 265 return (DDI_SUCCESS); 266 267 attachfail_minor_node: 268 blk_ring_fini(&state->bt_guest_ring); 269 attachfail_ringinit: 270 xpvtap_drv_fini(state); 271 return (DDI_FAILURE); 272 } 273 274 275 /* 276 * xpvtap_detach() 277 */ 278 static int 279 xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 280 { 281 xpvtap_state_t *state; 282 int instance; 283 284 285 instance = ddi_get_instance(dip); 286 state = ddi_get_soft_state(xpvtap_statep, instance); 287 if (state == NULL) { 288 return (DDI_FAILURE); 289 } 290 291 switch (cmd) { 292 case DDI_DETACH: 293 break; 294 295 case DDI_SUSPEND: 296 default: 297 return (DDI_FAILURE); 298 } 299 300 xpvtap_user_thread_stop(state); 301 blk_ring_fini(&state->bt_guest_ring); 302 xpvtap_drv_fini(state); 303 ddi_remove_minor_node(dip, NULL); 304 305 return (DDI_SUCCESS); 306 } 307 308 309 /* 310 * xpvtap_getinfo() 311 */ 312 /*ARGSUSED*/ 313 static int 314 xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 315 { 316 xpvtap_state_t *state; 317 int instance; 318 dev_t dev; 319 int e; 320 321 322 dev = (dev_t)arg; 323 instance = getminor(dev); 324 325 switch (cmd) { 326 case DDI_INFO_DEVT2DEVINFO: 327 state = ddi_get_soft_state(xpvtap_statep, instance); 328 if (state == NULL) { 329 return (DDI_FAILURE); 330 } 331 *result = (void *)state->bt_dip; 332 e = DDI_SUCCESS; 333 break; 334 335 case DDI_INFO_DEVT2INSTANCE: 336 *result = (void *)(uintptr_t)instance; 337 e = DDI_SUCCESS; 338 break; 339 340 default: 341 e = DDI_FAILURE; 342 break; 343 } 344 345 return (e); 346 } 347 348 349 /* 350 * xpvtap_open() 351 */ 352 /*ARGSUSED*/ 353 static int 354 xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred) 355 { 356 xpvtap_state_t *state; 357 int instance; 358 359 360 if (secpolicy_xvm_control(cred)) { 361 return (EPERM); 362 } 363 364 instance = getminor(*devp); 365 state = ddi_get_soft_state(xpvtap_statep, instance); 366 if (state == NULL) { 367 return (ENXIO); 368 } 369 370 /* we should only be opened once */ 371 mutex_enter(&state->bt_open.bo_mutex); 372 if (state->bt_open.bo_opened) { 373 mutex_exit(&state->bt_open.bo_mutex); 374 return (EBUSY); 375 } 376 state->bt_open.bo_opened = B_TRUE; 377 mutex_exit(&state->bt_open.bo_mutex); 378 379 /* 380 * save the apps address space. need it for mapping/unmapping grefs 381 * since will be doing it in a separate kernel thread. 382 */ 383 state->bt_map.um_as = curproc->p_as; 384 385 return (0); 386 } 387 388 389 /* 390 * xpvtap_close() 391 */ 392 /*ARGSUSED*/ 393 static int 394 xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred) 395 { 396 xpvtap_state_t *state; 397 int instance; 398 399 400 instance = getminor(devp); 401 state = ddi_get_soft_state(xpvtap_statep, instance); 402 if (state == NULL) { 403 return (ENXIO); 404 } 405 406 /* 407 * wake thread so it can cleanup and wait for it to exit so we can 408 * be sure it's not in the middle of processing a request/response. 409 */ 410 mutex_enter(&state->bt_thread.ut_mutex); 411 state->bt_thread.ut_wake = B_TRUE; 412 state->bt_thread.ut_exit = B_TRUE; 413 cv_signal(&state->bt_thread.ut_wake_cv); 414 if (!state->bt_thread.ut_exit_done) { 415 cv_wait(&state->bt_thread.ut_exit_done_cv, 416 &state->bt_thread.ut_mutex); 417 } 418 ASSERT(state->bt_thread.ut_exit_done); 419 mutex_exit(&state->bt_thread.ut_mutex); 420 421 state->bt_map.um_as = NULL; 422 state->bt_map.um_guest_pages = NULL; 423 424 /* 425 * when the ring is brought down, a userland hotplug script is run 426 * which tries to bring the userland app down. We'll wait for a bit 427 * for the user app to exit. Notify the thread waiting that the app 428 * has closed the driver. 429 */ 430 mutex_enter(&state->bt_open.bo_mutex); 431 ASSERT(state->bt_open.bo_opened); 432 state->bt_open.bo_opened = B_FALSE; 433 cv_signal(&state->bt_open.bo_exit_cv); 434 mutex_exit(&state->bt_open.bo_mutex); 435 436 return (0); 437 } 438 439 440 /* 441 * xpvtap_ioctl() 442 */ 443 /*ARGSUSED*/ 444 static int 445 xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, 446 int *rval) 447 { 448 xpvtap_state_t *state; 449 int instance; 450 451 452 if (secpolicy_xvm_control(cred)) { 453 return (EPERM); 454 } 455 456 instance = getminor(dev); 457 if (instance == -1) { 458 return (EBADF); 459 } 460 461 state = ddi_get_soft_state(xpvtap_statep, instance); 462 if (state == NULL) { 463 return (EBADF); 464 } 465 466 switch (cmd) { 467 case XPVTAP_IOCTL_RESP_PUSH: 468 /* 469 * wake thread, thread handles guest requests and user app 470 * responses. 471 */ 472 mutex_enter(&state->bt_thread.ut_mutex); 473 state->bt_thread.ut_wake = B_TRUE; 474 cv_signal(&state->bt_thread.ut_wake_cv); 475 mutex_exit(&state->bt_thread.ut_mutex); 476 break; 477 478 default: 479 cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd); 480 return (ENXIO); 481 } 482 483 return (0); 484 } 485 486 487 /* 488 * xpvtap_segmap() 489 */ 490 /*ARGSUSED*/ 491 static int 492 xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp, 493 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags, 494 cred_t *cred_p) 495 { 496 struct segmf_crargs a; 497 xpvtap_state_t *state; 498 int instance; 499 int e; 500 501 502 if (secpolicy_xvm_control(cred_p)) { 503 return (EPERM); 504 } 505 506 instance = getminor(dev); 507 state = ddi_get_soft_state(xpvtap_statep, instance); 508 if (state == NULL) { 509 return (EBADF); 510 } 511 512 /* the user app should be doing a MAP_SHARED mapping */ 513 if ((flags & MAP_TYPE) != MAP_SHARED) { 514 return (EINVAL); 515 } 516 517 /* 518 * if this is the user ring (offset = 0), devmap it (which ends up in 519 * xpvtap_devmap). devmap will alloc and map the ring into the 520 * app's VA space. 521 */ 522 if (off == 0) { 523 e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len, 524 prot, maxprot, flags, cred_p); 525 return (e); 526 } 527 528 /* this should be the mmap for the gref pages (offset = PAGESIZE) */ 529 if (off != PAGESIZE) { 530 return (EINVAL); 531 } 532 533 /* make sure we get the size we're expecting */ 534 if (len != XPVTAP_GREF_BUFSIZE) { 535 return (EINVAL); 536 } 537 538 /* 539 * reserve user app VA space for the gref pages and use segmf to 540 * manage the backing store for the physical memory. segmf will 541 * map in/out the grefs and fault them in/out. 542 */ 543 ASSERT(asp == state->bt_map.um_as); 544 as_rangelock(asp); 545 if ((flags & MAP_FIXED) == 0) { 546 map_addr(addrp, len, 0, 0, flags); 547 if (*addrp == NULL) { 548 as_rangeunlock(asp); 549 return (ENOMEM); 550 } 551 } else { 552 /* User specified address */ 553 (void) as_unmap(asp, *addrp, len); 554 } 555 a.dev = dev; 556 a.prot = (uchar_t)prot; 557 a.maxprot = (uchar_t)maxprot; 558 e = as_map(asp, *addrp, len, segmf_create, &a); 559 if (e != 0) { 560 as_rangeunlock(asp); 561 return (e); 562 } 563 as_rangeunlock(asp); 564 565 /* 566 * Stash user base address, and compute address where the request 567 * array will end up. 568 */ 569 state->bt_map.um_guest_pages = (caddr_t)*addrp; 570 state->bt_map.um_guest_size = (size_t)len; 571 572 /* register an as callback so we can cleanup when the app goes away */ 573 e = as_add_callback(asp, xpvtap_segmf_unregister, state, 574 AS_UNMAP_EVENT, *addrp, len, KM_SLEEP); 575 if (e != 0) { 576 (void) as_unmap(asp, *addrp, len); 577 return (EINVAL); 578 } 579 580 /* wake thread to see if there are requests already queued up */ 581 mutex_enter(&state->bt_thread.ut_mutex); 582 state->bt_thread.ut_wake = B_TRUE; 583 cv_signal(&state->bt_thread.ut_wake_cv); 584 mutex_exit(&state->bt_thread.ut_mutex); 585 586 return (0); 587 } 588 589 590 /* 591 * xpvtap_devmap() 592 */ 593 /*ARGSUSED*/ 594 static int 595 xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len, 596 size_t *maplen, uint_t model) 597 { 598 xpvtap_user_ring_t *usring; 599 xpvtap_state_t *state; 600 int instance; 601 int e; 602 603 604 instance = getminor(dev); 605 state = ddi_get_soft_state(xpvtap_statep, instance); 606 if (state == NULL) { 607 return (EBADF); 608 } 609 610 /* we should only get here if the offset was == 0 */ 611 if (off != 0) { 612 return (EINVAL); 613 } 614 615 /* we should only be mapping in one page */ 616 if (len != PAGESIZE) { 617 return (EINVAL); 618 } 619 620 /* 621 * we already allocated the user ring during driver attach, all we 622 * need to do is map it into the user app's VA. 623 */ 624 usring = &state->bt_user_ring; 625 e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0, 626 PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL); 627 if (e < 0) { 628 return (e); 629 } 630 631 /* return the size to compete the devmap */ 632 *maplen = PAGESIZE; 633 634 return (0); 635 } 636 637 638 /* 639 * xpvtap_chpoll() 640 */ 641 static int 642 xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 643 struct pollhead **phpp) 644 { 645 xpvtap_user_ring_t *usring; 646 xpvtap_state_t *state; 647 int instance; 648 649 650 instance = getminor(dev); 651 if (instance == -1) { 652 return (EBADF); 653 } 654 state = ddi_get_soft_state(xpvtap_statep, instance); 655 if (state == NULL) { 656 return (EBADF); 657 } 658 659 if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) { 660 return (EINVAL); 661 } 662 663 /* 664 * if we pushed requests on the user ring since the last poll, wakeup 665 * the user app 666 */ 667 *reventsp = 0; 668 usring = &state->bt_user_ring; 669 if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) { 670 671 /* 672 * XXX - is this faster here or xpvtap_user_request_push?? 673 * prelim data says here. Because less membars or because 674 * user thread will spin in poll requests before getting to 675 * responses? 676 */ 677 RING_PUSH_REQUESTS(&usring->ur_ring); 678 679 usring->ur_prod_polled = usring->ur_ring.sring->req_prod; 680 *reventsp = POLLIN | POLLRDNORM; 681 } 682 683 if ((*reventsp == 0 && !anyyet) || (events & POLLET)) { 684 *phpp = &state->bt_pollhead; 685 } 686 687 return (0); 688 } 689 690 691 /* 692 * xpvtap_drv_init() 693 */ 694 static xpvtap_state_t * 695 xpvtap_drv_init(int instance) 696 { 697 xpvtap_state_t *state; 698 int e; 699 700 701 e = ddi_soft_state_zalloc(xpvtap_statep, instance); 702 if (e != DDI_SUCCESS) { 703 return (NULL); 704 } 705 state = ddi_get_soft_state(xpvtap_statep, instance); 706 if (state == NULL) { 707 goto drvinitfail_get_soft_state; 708 } 709 710 state->bt_instance = instance; 711 mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL); 712 cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL); 713 state->bt_open.bo_opened = B_FALSE; 714 state->bt_map.um_registered = B_FALSE; 715 716 /* initialize user ring, thread, mapping state */ 717 e = xpvtap_user_init(state); 718 if (e != DDI_SUCCESS) { 719 goto drvinitfail_userinit; 720 } 721 722 return (state); 723 724 drvinitfail_userinit: 725 cv_destroy(&state->bt_open.bo_exit_cv); 726 mutex_destroy(&state->bt_open.bo_mutex); 727 drvinitfail_get_soft_state: 728 (void) ddi_soft_state_free(xpvtap_statep, instance); 729 return (NULL); 730 } 731 732 733 /* 734 * xpvtap_drv_fini() 735 */ 736 static void 737 xpvtap_drv_fini(xpvtap_state_t *state) 738 { 739 xpvtap_user_fini(state); 740 cv_destroy(&state->bt_open.bo_exit_cv); 741 mutex_destroy(&state->bt_open.bo_mutex); 742 (void) ddi_soft_state_free(xpvtap_statep, state->bt_instance); 743 } 744 745 746 /* 747 * xpvtap_intr() 748 * this routine will be called when we have a request on the guest ring. 749 */ 750 static uint_t 751 xpvtap_intr(caddr_t arg) 752 { 753 xpvtap_state_t *state; 754 755 756 state = (xpvtap_state_t *)arg; 757 758 /* wake thread, thread handles guest requests and user app responses */ 759 mutex_enter(&state->bt_thread.ut_mutex); 760 state->bt_thread.ut_wake = B_TRUE; 761 cv_signal(&state->bt_thread.ut_wake_cv); 762 mutex_exit(&state->bt_thread.ut_mutex); 763 764 return (DDI_INTR_CLAIMED); 765 } 766 767 768 /* 769 * xpvtap_segmf_register() 770 */ 771 static int 772 xpvtap_segmf_register(xpvtap_state_t *state) 773 { 774 struct seg *seg; 775 uint64_t pte_ma; 776 struct as *as; 777 caddr_t uaddr; 778 uint_t pgcnt; 779 int i; 780 781 782 as = state->bt_map.um_as; 783 pgcnt = btopr(state->bt_map.um_guest_size); 784 uaddr = state->bt_map.um_guest_pages; 785 786 if (pgcnt == 0) { 787 return (DDI_FAILURE); 788 } 789 790 AS_LOCK_ENTER(as, RW_READER); 791 792 seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 793 if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) > 794 (seg->s_base + seg->s_size))) { 795 AS_LOCK_EXIT(as); 796 return (DDI_FAILURE); 797 } 798 799 /* 800 * lock down the htables so the HAT can't steal them. Register the 801 * PTE MA's for each gref page with seg_mf so we can do user space 802 * gref mappings. 803 */ 804 for (i = 0; i < pgcnt; i++) { 805 hat_prepare_mapping(as->a_hat, uaddr, &pte_ma); 806 hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0, 807 PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK, 808 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 809 hat_release_mapping(as->a_hat, uaddr); 810 segmf_add_gref_pte(seg, uaddr, pte_ma); 811 uaddr += PAGESIZE; 812 } 813 814 state->bt_map.um_registered = B_TRUE; 815 816 AS_LOCK_EXIT(as); 817 818 return (DDI_SUCCESS); 819 } 820 821 822 /* 823 * xpvtap_segmf_unregister() 824 * as_callback routine 825 */ 826 /*ARGSUSED*/ 827 static void 828 xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event) 829 { 830 xpvtap_state_t *state; 831 caddr_t uaddr; 832 uint_t pgcnt; 833 int i; 834 835 836 state = (xpvtap_state_t *)arg; 837 if (!state->bt_map.um_registered) { 838 /* remove the callback (which is this routine) */ 839 (void) as_delete_callback(as, arg); 840 return; 841 } 842 843 pgcnt = btopr(state->bt_map.um_guest_size); 844 uaddr = state->bt_map.um_guest_pages; 845 846 /* unmap any outstanding req's grefs */ 847 xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state); 848 849 /* Unlock the gref pages */ 850 for (i = 0; i < pgcnt; i++) { 851 AS_LOCK_ENTER(as, RW_WRITER); 852 hat_prepare_mapping(as->a_hat, uaddr, NULL); 853 hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK); 854 hat_release_mapping(as->a_hat, uaddr); 855 AS_LOCK_EXIT(as); 856 uaddr += PAGESIZE; 857 } 858 859 /* remove the callback (which is this routine) */ 860 (void) as_delete_callback(as, arg); 861 862 state->bt_map.um_registered = B_FALSE; 863 } 864 865 866 /* 867 * xpvtap_user_init() 868 */ 869 static int 870 xpvtap_user_init(xpvtap_state_t *state) 871 { 872 xpvtap_user_map_t *map; 873 int e; 874 875 876 map = &state->bt_map; 877 878 /* Setup the ring between the driver and user app */ 879 e = xpvtap_user_ring_init(state); 880 if (e != DDI_SUCCESS) { 881 return (DDI_FAILURE); 882 } 883 884 /* 885 * the user ring can handle BLKIF_RING_SIZE outstanding requests. This 886 * is the same number of requests as the guest ring. Initialize the 887 * state we use to track request IDs to the user app. These IDs will 888 * also identify which group of gref pages correspond with the 889 * request. 890 */ 891 xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs); 892 893 /* 894 * allocate the space to store a copy of each outstanding requests. We 895 * will need to reference the ID and the number of segments when we 896 * get the response from the user app. 897 */ 898 map->um_outstanding_reqs = kmem_zalloc( 899 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE, 900 KM_SLEEP); 901 902 /* 903 * initialize the thread we use to process guest requests and user 904 * responses. 905 */ 906 e = xpvtap_user_thread_init(state); 907 if (e != DDI_SUCCESS) { 908 goto userinitfail_user_thread_init; 909 } 910 911 return (DDI_SUCCESS); 912 913 userinitfail_user_thread_init: 914 xpvtap_rs_fini(&map->um_rs); 915 kmem_free(map->um_outstanding_reqs, 916 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE); 917 xpvtap_user_ring_fini(state); 918 return (DDI_FAILURE); 919 } 920 921 922 /* 923 * xpvtap_user_ring_init() 924 */ 925 static int 926 xpvtap_user_ring_init(xpvtap_state_t *state) 927 { 928 xpvtap_user_ring_t *usring; 929 930 931 usring = &state->bt_user_ring; 932 933 /* alocate and initialize the page for the shared user ring */ 934 usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE, 935 DDI_UMEM_SLEEP, &usring->ur_cookie); 936 SHARED_RING_INIT(usring->ur_sring); 937 FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE); 938 usring->ur_prod_polled = 0; 939 940 return (DDI_SUCCESS); 941 } 942 943 944 /* 945 * xpvtap_user_thread_init() 946 */ 947 static int 948 xpvtap_user_thread_init(xpvtap_state_t *state) 949 { 950 xpvtap_user_thread_t *thread; 951 char taskqname[32]; 952 953 954 thread = &state->bt_thread; 955 956 mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL); 957 cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL); 958 cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL); 959 thread->ut_wake = B_FALSE; 960 thread->ut_exit = B_FALSE; 961 thread->ut_exit_done = B_TRUE; 962 963 /* create but don't start the user thread */ 964 (void) sprintf(taskqname, "xvptap_%d", state->bt_instance); 965 thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1, 966 TASKQ_DEFAULTPRI, 0); 967 if (thread->ut_taskq == NULL) { 968 goto userinitthrfail_taskq_create; 969 } 970 971 return (DDI_SUCCESS); 972 973 userinitthrfail_taskq_dispatch: 974 ddi_taskq_destroy(thread->ut_taskq); 975 userinitthrfail_taskq_create: 976 cv_destroy(&thread->ut_exit_done_cv); 977 cv_destroy(&thread->ut_wake_cv); 978 mutex_destroy(&thread->ut_mutex); 979 980 return (DDI_FAILURE); 981 } 982 983 984 /* 985 * xpvtap_user_thread_start() 986 */ 987 static void 988 xpvtap_user_thread_start(caddr_t arg) 989 { 990 xpvtap_user_thread_t *thread; 991 xpvtap_state_t *state; 992 int e; 993 994 995 state = (xpvtap_state_t *)arg; 996 thread = &state->bt_thread; 997 998 /* start the user thread */ 999 thread->ut_exit_done = B_FALSE; 1000 e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state, 1001 DDI_SLEEP); 1002 if (e != DDI_SUCCESS) { 1003 thread->ut_exit_done = B_TRUE; 1004 cmn_err(CE_WARN, "Unable to start user thread\n"); 1005 } 1006 } 1007 1008 1009 /* 1010 * xpvtap_user_thread_stop() 1011 */ 1012 static void 1013 xpvtap_user_thread_stop(xpvtap_state_t *state) 1014 { 1015 /* wake thread so it can exit */ 1016 mutex_enter(&state->bt_thread.ut_mutex); 1017 state->bt_thread.ut_wake = B_TRUE; 1018 state->bt_thread.ut_exit = B_TRUE; 1019 cv_signal(&state->bt_thread.ut_wake_cv); 1020 if (!state->bt_thread.ut_exit_done) { 1021 cv_wait(&state->bt_thread.ut_exit_done_cv, 1022 &state->bt_thread.ut_mutex); 1023 } 1024 mutex_exit(&state->bt_thread.ut_mutex); 1025 ASSERT(state->bt_thread.ut_exit_done); 1026 } 1027 1028 1029 /* 1030 * xpvtap_user_fini() 1031 */ 1032 static void 1033 xpvtap_user_fini(xpvtap_state_t *state) 1034 { 1035 xpvtap_user_map_t *map; 1036 1037 1038 map = &state->bt_map; 1039 1040 xpvtap_user_thread_fini(state); 1041 xpvtap_rs_fini(&map->um_rs); 1042 kmem_free(map->um_outstanding_reqs, 1043 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE); 1044 xpvtap_user_ring_fini(state); 1045 } 1046 1047 1048 /* 1049 * xpvtap_user_ring_fini() 1050 */ 1051 static void 1052 xpvtap_user_ring_fini(xpvtap_state_t *state) 1053 { 1054 ddi_umem_free(state->bt_user_ring.ur_cookie); 1055 } 1056 1057 1058 /* 1059 * xpvtap_user_thread_fini() 1060 */ 1061 static void 1062 xpvtap_user_thread_fini(xpvtap_state_t *state) 1063 { 1064 ddi_taskq_destroy(state->bt_thread.ut_taskq); 1065 cv_destroy(&state->bt_thread.ut_exit_done_cv); 1066 cv_destroy(&state->bt_thread.ut_wake_cv); 1067 mutex_destroy(&state->bt_thread.ut_mutex); 1068 } 1069 1070 1071 /* 1072 * xpvtap_user_thread() 1073 */ 1074 static void 1075 xpvtap_user_thread(void *arg) 1076 { 1077 xpvtap_user_thread_t *thread; 1078 blkif_response_t resp; 1079 xpvtap_state_t *state; 1080 blkif_request_t req; 1081 boolean_t b; 1082 uint_t uid; 1083 int e; 1084 1085 1086 state = (xpvtap_state_t *)arg; 1087 thread = &state->bt_thread; 1088 1089 xpvtap_thread_start: 1090 /* See if we are supposed to exit */ 1091 mutex_enter(&thread->ut_mutex); 1092 if (thread->ut_exit) { 1093 thread->ut_exit_done = B_TRUE; 1094 cv_signal(&state->bt_thread.ut_exit_done_cv); 1095 mutex_exit(&thread->ut_mutex); 1096 return; 1097 } 1098 1099 /* 1100 * if we aren't supposed to be awake, wait until someone wakes us. 1101 * when we wake up, check for a kill or someone telling us to exit. 1102 */ 1103 if (!thread->ut_wake) { 1104 e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex); 1105 if ((e == 0) || (thread->ut_exit)) { 1106 thread->ut_exit = B_TRUE; 1107 mutex_exit(&thread->ut_mutex); 1108 goto xpvtap_thread_start; 1109 } 1110 } 1111 1112 /* if someone didn't wake us, go back to the start of the thread */ 1113 if (!thread->ut_wake) { 1114 mutex_exit(&thread->ut_mutex); 1115 goto xpvtap_thread_start; 1116 } 1117 1118 /* we are awake */ 1119 thread->ut_wake = B_FALSE; 1120 mutex_exit(&thread->ut_mutex); 1121 1122 /* process requests from the guest */ 1123 do { 1124 /* 1125 * check for requests from the guest. if we don't have any, 1126 * break out of the loop. 1127 */ 1128 e = blk_ring_request_get(state->bt_guest_ring, &req); 1129 if (e == B_FALSE) { 1130 break; 1131 } 1132 1133 /* we got a request, map the grefs into the user app's VA */ 1134 e = xpvtap_user_request_map(state, &req, &uid); 1135 if (e != DDI_SUCCESS) { 1136 /* 1137 * If we couldn't map the request (e.g. user app hasn't 1138 * opened the device yet), requeue it and try again 1139 * later 1140 */ 1141 blk_ring_request_requeue(state->bt_guest_ring); 1142 break; 1143 } 1144 1145 /* push the request to the user app */ 1146 e = xpvtap_user_request_push(state, &req, uid); 1147 if (e != DDI_SUCCESS) { 1148 resp.id = req.id; 1149 resp.operation = req.operation; 1150 resp.status = BLKIF_RSP_ERROR; 1151 blk_ring_response_put(state->bt_guest_ring, &resp); 1152 } 1153 } while (!thread->ut_exit); 1154 1155 /* process reponses from the user app */ 1156 do { 1157 /* 1158 * check for responses from the user app. if we don't have any, 1159 * break out of the loop. 1160 */ 1161 b = xpvtap_user_response_get(state, &resp, &uid); 1162 if (b != B_TRUE) { 1163 break; 1164 } 1165 1166 /* 1167 * if we got a response, unmap the grefs from the matching 1168 * request. 1169 */ 1170 xpvtap_user_request_unmap(state, uid); 1171 1172 /* push the response to the guest */ 1173 blk_ring_response_put(state->bt_guest_ring, &resp); 1174 } while (!thread->ut_exit); 1175 1176 goto xpvtap_thread_start; 1177 } 1178 1179 1180 /* 1181 * xpvtap_user_request_map() 1182 */ 1183 static int 1184 xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req, 1185 uint_t *uid) 1186 { 1187 grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 1188 struct seg *seg; 1189 struct as *as; 1190 domid_t domid; 1191 caddr_t uaddr; 1192 uint_t flags; 1193 int i; 1194 int e; 1195 1196 1197 domid = xvdi_get_oeid(state->bt_dip); 1198 1199 as = state->bt_map.um_as; 1200 if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) { 1201 return (DDI_FAILURE); 1202 } 1203 1204 /* has to happen after segmap returns */ 1205 if (!state->bt_map.um_registered) { 1206 /* register the pte's with segmf */ 1207 e = xpvtap_segmf_register(state); 1208 if (e != DDI_SUCCESS) { 1209 return (DDI_FAILURE); 1210 } 1211 } 1212 1213 /* alloc an ID for the user ring */ 1214 e = xpvtap_rs_alloc(state->bt_map.um_rs, uid); 1215 if (e != DDI_SUCCESS) { 1216 return (DDI_FAILURE); 1217 } 1218 1219 /* if we don't have any segments to map, we're done */ 1220 if ((req->operation == BLKIF_OP_WRITE_BARRIER) || 1221 (req->operation == BLKIF_OP_FLUSH_DISKCACHE) || 1222 (req->nr_segments == 0)) { 1223 return (DDI_SUCCESS); 1224 } 1225 1226 /* get the apps gref address */ 1227 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid); 1228 1229 AS_LOCK_ENTER(as, RW_READER); 1230 seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 1231 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) > 1232 (seg->s_base + seg->s_size))) { 1233 AS_LOCK_EXIT(as); 1234 return (DDI_FAILURE); 1235 } 1236 1237 /* if we are reading from disk, we are writing into memory */ 1238 flags = 0; 1239 if (req->operation == BLKIF_OP_READ) { 1240 flags |= SEGMF_GREF_WR; 1241 } 1242 1243 /* Load the grefs into seg_mf */ 1244 for (i = 0; i < req->nr_segments; i++) { 1245 gref[i] = req->seg[i].gref; 1246 } 1247 (void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments, 1248 domid); 1249 1250 AS_LOCK_EXIT(as); 1251 1252 return (DDI_SUCCESS); 1253 } 1254 1255 1256 /* 1257 * xpvtap_user_request_push() 1258 */ 1259 static int 1260 xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req, 1261 uint_t uid) 1262 { 1263 blkif_request_t *outstanding_req; 1264 blkif_front_ring_t *uring; 1265 blkif_request_t *target; 1266 xpvtap_user_map_t *map; 1267 1268 1269 uring = &state->bt_user_ring.ur_ring; 1270 map = &state->bt_map; 1271 1272 target = RING_GET_REQUEST(uring, uring->req_prod_pvt); 1273 1274 /* 1275 * Save request from the frontend. used for ID mapping and unmap 1276 * on response/cleanup 1277 */ 1278 outstanding_req = &map->um_outstanding_reqs[uid]; 1279 bcopy(req, outstanding_req, sizeof (*outstanding_req)); 1280 1281 /* put the request on the user ring */ 1282 bcopy(req, target, sizeof (*req)); 1283 target->id = (uint64_t)uid; 1284 uring->req_prod_pvt++; 1285 1286 pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM); 1287 1288 return (DDI_SUCCESS); 1289 } 1290 1291 1292 static void 1293 xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid) 1294 { 1295 blkif_request_t *req; 1296 struct seg *seg; 1297 struct as *as; 1298 caddr_t uaddr; 1299 int e; 1300 1301 1302 as = state->bt_map.um_as; 1303 if (as == NULL) { 1304 return; 1305 } 1306 1307 /* get a copy of the original request */ 1308 req = &state->bt_map.um_outstanding_reqs[uid]; 1309 1310 /* unmap the grefs for this request */ 1311 if ((req->operation != BLKIF_OP_WRITE_BARRIER) && 1312 (req->operation != BLKIF_OP_FLUSH_DISKCACHE) && 1313 (req->nr_segments != 0)) { 1314 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid); 1315 AS_LOCK_ENTER(as, RW_READER); 1316 seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 1317 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) > 1318 (seg->s_base + seg->s_size))) { 1319 AS_LOCK_EXIT(as); 1320 xpvtap_rs_free(state->bt_map.um_rs, uid); 1321 return; 1322 } 1323 1324 e = segmf_release_grefs(seg, uaddr, req->nr_segments); 1325 if (e != 0) { 1326 cmn_err(CE_WARN, "unable to release grefs"); 1327 } 1328 1329 AS_LOCK_EXIT(as); 1330 } 1331 1332 /* free up the user ring id */ 1333 xpvtap_rs_free(state->bt_map.um_rs, uid); 1334 } 1335 1336 1337 static int 1338 xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp, 1339 uint_t *uid) 1340 { 1341 blkif_front_ring_t *uring; 1342 blkif_response_t *target; 1343 1344 1345 uring = &state->bt_user_ring.ur_ring; 1346 1347 if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) { 1348 return (B_FALSE); 1349 } 1350 1351 target = NULL; 1352 target = RING_GET_RESPONSE(uring, uring->rsp_cons); 1353 if (target == NULL) { 1354 return (B_FALSE); 1355 } 1356 1357 /* copy out the user app response */ 1358 bcopy(target, resp, sizeof (*resp)); 1359 uring->rsp_cons++; 1360 1361 /* restore the quests id from the original request */ 1362 *uid = (uint_t)resp->id; 1363 resp->id = state->bt_map.um_outstanding_reqs[*uid].id; 1364 1365 return (B_TRUE); 1366 } 1367 1368 1369 /* 1370 * xpvtap_user_app_stop() 1371 */ 1372 static void xpvtap_user_app_stop(caddr_t arg) 1373 { 1374 xpvtap_state_t *state; 1375 clock_t rc; 1376 1377 state = (xpvtap_state_t *)arg; 1378 1379 /* 1380 * Give the app 10 secs to exit. If it doesn't exit, it's not a serious 1381 * problem, we just won't auto-detach the driver. 1382 */ 1383 mutex_enter(&state->bt_open.bo_mutex); 1384 if (state->bt_open.bo_opened) { 1385 rc = cv_reltimedwait(&state->bt_open.bo_exit_cv, 1386 &state->bt_open.bo_mutex, drv_usectohz(10000000), 1387 TR_CLOCK_TICK); 1388 if (rc <= 0) { 1389 cmn_err(CE_NOTE, "!user process still has driver open, " 1390 "deferring detach\n"); 1391 } 1392 } 1393 mutex_exit(&state->bt_open.bo_mutex); 1394 } 1395 1396 1397 /* 1398 * xpvtap_rs_init() 1399 * Initialize the resource structure. init() returns a handle to be used 1400 * for the rest of the resource functions. This code is written assuming 1401 * that min_val will be close to 0. Therefore, we will allocate the free 1402 * buffer only taking max_val into account. 1403 */ 1404 static void 1405 xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle) 1406 { 1407 xpvtap_rs_t *rstruct; 1408 uint_t array_size; 1409 uint_t index; 1410 1411 1412 ASSERT(handle != NULL); 1413 ASSERT(min_val < max_val); 1414 1415 /* alloc space for resource structure */ 1416 rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP); 1417 1418 /* 1419 * Test to see if the max value is 64-bit aligned. If so, we don't need 1420 * to allocate an extra 64-bit word. alloc space for free buffer 1421 * (8 bytes per uint64_t). 1422 */ 1423 if ((max_val & 0x3F) == 0) { 1424 rstruct->rs_free_size = (max_val >> 6) * 8; 1425 } else { 1426 rstruct->rs_free_size = ((max_val >> 6) + 1) * 8; 1427 } 1428 rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP); 1429 1430 /* Initialize resource structure */ 1431 rstruct->rs_min = min_val; 1432 rstruct->rs_last = min_val; 1433 rstruct->rs_max = max_val; 1434 mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL); 1435 rstruct->rs_flushing = B_FALSE; 1436 1437 /* Mark all resources as free */ 1438 array_size = rstruct->rs_free_size >> 3; 1439 for (index = 0; index < array_size; index++) { 1440 rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF; 1441 } 1442 1443 /* setup handle which is returned from this function */ 1444 *handle = rstruct; 1445 } 1446 1447 1448 /* 1449 * xpvtap_rs_fini() 1450 * Frees up the space allocated in init(). Notice that a pointer to the 1451 * handle is used for the parameter. fini() will set the handle to NULL 1452 * before returning. 1453 */ 1454 static void 1455 xpvtap_rs_fini(xpvtap_rs_hdl_t *handle) 1456 { 1457 xpvtap_rs_t *rstruct; 1458 1459 1460 ASSERT(handle != NULL); 1461 1462 rstruct = (xpvtap_rs_t *)*handle; 1463 1464 mutex_destroy(&rstruct->rs_mutex); 1465 kmem_free(rstruct->rs_free, rstruct->rs_free_size); 1466 kmem_free(rstruct, sizeof (xpvtap_rs_t)); 1467 1468 /* set handle to null. This helps catch bugs. */ 1469 *handle = NULL; 1470 } 1471 1472 1473 /* 1474 * xpvtap_rs_alloc() 1475 * alloc a resource. If alloc fails, we are out of resources. 1476 */ 1477 static int 1478 xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource) 1479 { 1480 xpvtap_rs_t *rstruct; 1481 uint_t array_idx; 1482 uint64_t free; 1483 uint_t index; 1484 uint_t last; 1485 uint_t min; 1486 uint_t max; 1487 1488 1489 ASSERT(handle != NULL); 1490 ASSERT(resource != NULL); 1491 1492 rstruct = (xpvtap_rs_t *)handle; 1493 1494 mutex_enter(&rstruct->rs_mutex); 1495 min = rstruct->rs_min; 1496 max = rstruct->rs_max; 1497 1498 /* 1499 * Find a free resource. This will return out of the loop once it finds 1500 * a free resource. There are a total of 'max'-'min'+1 resources. 1501 * Performs a round robin allocation. 1502 */ 1503 for (index = min; index <= max; index++) { 1504 1505 array_idx = rstruct->rs_last >> 6; 1506 free = rstruct->rs_free[array_idx]; 1507 last = rstruct->rs_last & 0x3F; 1508 1509 /* if the next resource to check is free */ 1510 if ((free & ((uint64_t)1 << last)) != 0) { 1511 /* we are using this resource */ 1512 *resource = rstruct->rs_last; 1513 1514 /* take it out of the free list */ 1515 rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last); 1516 1517 /* 1518 * increment the last count so we start checking the 1519 * next resource on the next alloc(). Note the rollover 1520 * at 'max'+1. 1521 */ 1522 rstruct->rs_last++; 1523 if (rstruct->rs_last > max) { 1524 rstruct->rs_last = rstruct->rs_min; 1525 } 1526 1527 /* unlock the resource structure */ 1528 mutex_exit(&rstruct->rs_mutex); 1529 1530 return (DDI_SUCCESS); 1531 } 1532 1533 /* 1534 * This resource is not free, lets go to the next one. Note the 1535 * rollover at 'max'. 1536 */ 1537 rstruct->rs_last++; 1538 if (rstruct->rs_last > max) { 1539 rstruct->rs_last = rstruct->rs_min; 1540 } 1541 } 1542 1543 mutex_exit(&rstruct->rs_mutex); 1544 1545 return (DDI_FAILURE); 1546 } 1547 1548 1549 /* 1550 * xpvtap_rs_free() 1551 * Free the previously alloc'd resource. Once a resource has been free'd, 1552 * it can be used again when alloc is called. 1553 */ 1554 static void 1555 xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource) 1556 { 1557 xpvtap_rs_t *rstruct; 1558 uint_t array_idx; 1559 uint_t offset; 1560 1561 1562 ASSERT(handle != NULL); 1563 1564 rstruct = (xpvtap_rs_t *)handle; 1565 ASSERT(resource >= rstruct->rs_min); 1566 ASSERT(resource <= rstruct->rs_max); 1567 1568 if (!rstruct->rs_flushing) { 1569 mutex_enter(&rstruct->rs_mutex); 1570 } 1571 1572 /* Put the resource back in the free list */ 1573 array_idx = resource >> 6; 1574 offset = resource & 0x3F; 1575 rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset); 1576 1577 if (!rstruct->rs_flushing) { 1578 mutex_exit(&rstruct->rs_mutex); 1579 } 1580 } 1581 1582 1583 /* 1584 * xpvtap_rs_flush() 1585 */ 1586 static void 1587 xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback, 1588 void *arg) 1589 { 1590 xpvtap_rs_t *rstruct; 1591 uint_t array_idx; 1592 uint64_t free; 1593 uint_t index; 1594 uint_t last; 1595 uint_t min; 1596 uint_t max; 1597 1598 1599 ASSERT(handle != NULL); 1600 1601 rstruct = (xpvtap_rs_t *)handle; 1602 1603 mutex_enter(&rstruct->rs_mutex); 1604 min = rstruct->rs_min; 1605 max = rstruct->rs_max; 1606 1607 rstruct->rs_flushing = B_TRUE; 1608 1609 /* 1610 * for all resources not free, call the callback routine to clean it 1611 * up. 1612 */ 1613 for (index = min; index <= max; index++) { 1614 1615 array_idx = rstruct->rs_last >> 6; 1616 free = rstruct->rs_free[array_idx]; 1617 last = rstruct->rs_last & 0x3F; 1618 1619 /* if the next resource to check is not free */ 1620 if ((free & ((uint64_t)1 << last)) == 0) { 1621 /* call the callback to cleanup */ 1622 (*callback)(arg, rstruct->rs_last); 1623 1624 /* put it back in the free list */ 1625 rstruct->rs_free[array_idx] |= ((uint64_t)1 << last); 1626 } 1627 1628 /* go to the next one. Note the rollover at 'max' */ 1629 rstruct->rs_last++; 1630 if (rstruct->rs_last > max) { 1631 rstruct->rs_last = rstruct->rs_min; 1632 } 1633 } 1634 1635 mutex_exit(&rstruct->rs_mutex); 1636 } 1637