1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 28 #include <sys/errno.h> 29 #include <sys/types.h> 30 #include <sys/conf.h> 31 #include <sys/kmem.h> 32 #include <sys/ddi.h> 33 #include <sys/stat.h> 34 #include <sys/sunddi.h> 35 #include <sys/file.h> 36 #include <sys/open.h> 37 #include <sys/modctl.h> 38 #include <sys/ddi_impldefs.h> 39 #include <sys/sysmacros.h> 40 #include <sys/ddidevmap.h> 41 #include <sys/policy.h> 42 43 #include <sys/vmsystm.h> 44 #include <vm/hat_i86.h> 45 #include <vm/hat_pte.h> 46 #include <vm/seg_kmem.h> 47 #include <vm/seg_mf.h> 48 49 #include <xen/io/blkif_impl.h> 50 #include <xen/io/blk_common.h> 51 #include <xen/io/xpvtap.h> 52 53 54 static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred); 55 static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred); 56 static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 57 cred_t *cred, int *rval); 58 static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, 59 size_t len, size_t *maplen, uint_t model); 60 static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp, 61 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags, 62 cred_t *cred_p); 63 static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 64 struct pollhead **phpp); 65 66 static struct cb_ops xpvtap_cb_ops = { 67 xpvtap_open, /* cb_open */ 68 xpvtap_close, /* cb_close */ 69 nodev, /* cb_strategy */ 70 nodev, /* cb_print */ 71 nodev, /* cb_dump */ 72 nodev, /* cb_read */ 73 nodev, /* cb_write */ 74 xpvtap_ioctl, /* cb_ioctl */ 75 xpvtap_devmap, /* cb_devmap */ 76 nodev, /* cb_mmap */ 77 xpvtap_segmap, /* cb_segmap */ 78 xpvtap_chpoll, /* cb_chpoll */ 79 ddi_prop_op, /* cb_prop_op */ 80 NULL, /* cb_stream */ 81 D_NEW | D_MP | D_64BIT | D_DEVMAP, /* cb_flag */ 82 CB_REV 83 }; 84 85 static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, 86 void **result); 87 static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd); 88 static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd); 89 90 static struct dev_ops xpvtap_dev_ops = { 91 DEVO_REV, /* devo_rev */ 92 0, /* devo_refcnt */ 93 xpvtap_getinfo, /* devo_getinfo */ 94 nulldev, /* devo_identify */ 95 nulldev, /* devo_probe */ 96 xpvtap_attach, /* devo_attach */ 97 xpvtap_detach, /* devo_detach */ 98 nodev, /* devo_reset */ 99 &xpvtap_cb_ops, /* devo_cb_ops */ 100 NULL, /* devo_bus_ops */ 101 NULL /* power */ 102 }; 103 104 105 static struct modldrv xpvtap_modldrv = { 106 &mod_driverops, /* Type of module. This one is a driver */ 107 "xpvtap driver", /* Name of the module. */ 108 &xpvtap_dev_ops, /* driver ops */ 109 }; 110 111 static struct modlinkage xpvtap_modlinkage = { 112 MODREV_1, 113 (void *) &xpvtap_modldrv, 114 NULL 115 }; 116 117 118 void *xpvtap_statep; 119 120 121 static xpvtap_state_t *xpvtap_drv_init(int instance); 122 static void xpvtap_drv_fini(xpvtap_state_t *state); 123 static uint_t xpvtap_intr(caddr_t arg); 124 125 typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs); 126 static void xpvtap_rs_init(uint_t min_val, uint_t max_val, 127 xpvtap_rs_hdl_t *handle); 128 static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle); 129 static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs); 130 static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs); 131 static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle, 132 xpvtap_rs_cleanup_t callback, void *arg); 133 134 static int xpvtap_segmf_register(xpvtap_state_t *state); 135 static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event); 136 137 static int xpvtap_user_init(xpvtap_state_t *state); 138 static void xpvtap_user_fini(xpvtap_state_t *state); 139 static int xpvtap_user_ring_init(xpvtap_state_t *state); 140 static void xpvtap_user_ring_fini(xpvtap_state_t *state); 141 static int xpvtap_user_thread_init(xpvtap_state_t *state); 142 static void xpvtap_user_thread_fini(xpvtap_state_t *state); 143 static void xpvtap_user_thread_start(caddr_t arg); 144 static void xpvtap_user_thread_stop(xpvtap_state_t *state); 145 static void xpvtap_user_thread(void *arg); 146 147 static void xpvtap_user_app_stop(caddr_t arg); 148 149 static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req, 150 uint_t *uid); 151 static int xpvtap_user_request_push(xpvtap_state_t *state, 152 blkif_request_t *req, uint_t uid); 153 static int xpvtap_user_response_get(xpvtap_state_t *state, 154 blkif_response_t *resp, uint_t *uid); 155 static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid); 156 157 158 /* 159 * _init() 160 */ 161 int 162 _init(void) 163 { 164 int e; 165 166 e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1); 167 if (e != 0) { 168 return (e); 169 } 170 171 e = mod_install(&xpvtap_modlinkage); 172 if (e != 0) { 173 ddi_soft_state_fini(&xpvtap_statep); 174 return (e); 175 } 176 177 return (0); 178 } 179 180 181 /* 182 * _info() 183 */ 184 int 185 _info(struct modinfo *modinfop) 186 { 187 return (mod_info(&xpvtap_modlinkage, modinfop)); 188 } 189 190 191 /* 192 * _fini() 193 */ 194 int 195 _fini(void) 196 { 197 int e; 198 199 e = mod_remove(&xpvtap_modlinkage); 200 if (e != 0) { 201 return (e); 202 } 203 204 ddi_soft_state_fini(&xpvtap_statep); 205 206 return (0); 207 } 208 209 210 /* 211 * xpvtap_attach() 212 */ 213 static int 214 xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 215 { 216 blk_ringinit_args_t args; 217 xpvtap_state_t *state; 218 int instance; 219 int e; 220 221 222 switch (cmd) { 223 case DDI_ATTACH: 224 break; 225 226 case DDI_RESUME: 227 return (DDI_SUCCESS); 228 229 default: 230 return (DDI_FAILURE); 231 } 232 233 /* initialize our state info */ 234 instance = ddi_get_instance(dip); 235 state = xpvtap_drv_init(instance); 236 if (state == NULL) { 237 return (DDI_FAILURE); 238 } 239 state->bt_dip = dip; 240 241 /* Initialize the guest ring */ 242 args.ar_dip = state->bt_dip; 243 args.ar_intr = xpvtap_intr; 244 args.ar_intr_arg = (caddr_t)state; 245 args.ar_ringup = xpvtap_user_thread_start; 246 args.ar_ringup_arg = (caddr_t)state; 247 args.ar_ringdown = xpvtap_user_app_stop; 248 args.ar_ringdown_arg = (caddr_t)state; 249 e = blk_ring_init(&args, &state->bt_guest_ring); 250 if (e != DDI_SUCCESS) { 251 goto attachfail_ringinit; 252 } 253 254 /* create the minor node (for ioctl/mmap) */ 255 e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance, 256 DDI_PSEUDO, 0); 257 if (e != DDI_SUCCESS) { 258 goto attachfail_minor_node; 259 } 260 261 /* Report that driver was loaded */ 262 ddi_report_dev(dip); 263 264 return (DDI_SUCCESS); 265 266 attachfail_minor_node: 267 blk_ring_fini(&state->bt_guest_ring); 268 attachfail_ringinit: 269 xpvtap_drv_fini(state); 270 return (DDI_FAILURE); 271 } 272 273 274 /* 275 * xpvtap_detach() 276 */ 277 static int 278 xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 279 { 280 xpvtap_state_t *state; 281 int instance; 282 283 284 instance = ddi_get_instance(dip); 285 state = ddi_get_soft_state(xpvtap_statep, instance); 286 if (state == NULL) { 287 return (DDI_FAILURE); 288 } 289 290 switch (cmd) { 291 case DDI_DETACH: 292 break; 293 294 case DDI_SUSPEND: 295 default: 296 return (DDI_FAILURE); 297 } 298 299 xpvtap_user_thread_stop(state); 300 blk_ring_fini(&state->bt_guest_ring); 301 xpvtap_drv_fini(state); 302 ddi_remove_minor_node(dip, NULL); 303 304 return (DDI_SUCCESS); 305 } 306 307 308 /* 309 * xpvtap_getinfo() 310 */ 311 /*ARGSUSED*/ 312 static int 313 xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 314 { 315 xpvtap_state_t *state; 316 int instance; 317 dev_t dev; 318 int e; 319 320 321 dev = (dev_t)arg; 322 instance = getminor(dev); 323 324 switch (cmd) { 325 case DDI_INFO_DEVT2DEVINFO: 326 state = ddi_get_soft_state(xpvtap_statep, instance); 327 if (state == NULL) { 328 return (DDI_FAILURE); 329 } 330 *result = (void *)state->bt_dip; 331 e = DDI_SUCCESS; 332 break; 333 334 case DDI_INFO_DEVT2INSTANCE: 335 *result = (void *)(uintptr_t)instance; 336 e = DDI_SUCCESS; 337 break; 338 339 default: 340 e = DDI_FAILURE; 341 break; 342 } 343 344 return (e); 345 } 346 347 348 /* 349 * xpvtap_open() 350 */ 351 /*ARGSUSED*/ 352 static int 353 xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred) 354 { 355 xpvtap_state_t *state; 356 int instance; 357 358 359 if (secpolicy_xvm_control(cred)) { 360 return (EPERM); 361 } 362 363 instance = getminor(*devp); 364 state = ddi_get_soft_state(xpvtap_statep, instance); 365 if (state == NULL) { 366 return (ENXIO); 367 } 368 369 /* we should only be opened once */ 370 mutex_enter(&state->bt_open.bo_mutex); 371 if (state->bt_open.bo_opened) { 372 mutex_exit(&state->bt_open.bo_mutex); 373 return (EBUSY); 374 } 375 state->bt_open.bo_opened = B_TRUE; 376 mutex_exit(&state->bt_open.bo_mutex); 377 378 /* 379 * save the apps address space. need it for mapping/unmapping grefs 380 * since will be doing it in a separate kernel thread. 381 */ 382 state->bt_map.um_as = curproc->p_as; 383 384 return (0); 385 } 386 387 388 /* 389 * xpvtap_close() 390 */ 391 /*ARGSUSED*/ 392 static int 393 xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred) 394 { 395 xpvtap_state_t *state; 396 int instance; 397 398 399 instance = getminor(devp); 400 state = ddi_get_soft_state(xpvtap_statep, instance); 401 if (state == NULL) { 402 return (ENXIO); 403 } 404 405 /* 406 * wake thread so it can cleanup and wait for it to exit so we can 407 * be sure it's not in the middle of processing a request/response. 408 */ 409 mutex_enter(&state->bt_thread.ut_mutex); 410 state->bt_thread.ut_wake = B_TRUE; 411 state->bt_thread.ut_exit = B_TRUE; 412 cv_signal(&state->bt_thread.ut_wake_cv); 413 if (!state->bt_thread.ut_exit_done) { 414 cv_wait(&state->bt_thread.ut_exit_done_cv, 415 &state->bt_thread.ut_mutex); 416 } 417 ASSERT(state->bt_thread.ut_exit_done); 418 mutex_exit(&state->bt_thread.ut_mutex); 419 420 state->bt_map.um_as = NULL; 421 state->bt_map.um_guest_pages = NULL; 422 423 /* 424 * when the ring is brought down, a userland hotplug script is run 425 * which tries to bring the userland app down. We'll wait for a bit 426 * for the user app to exit. Notify the thread waiting that the app 427 * has closed the driver. 428 */ 429 mutex_enter(&state->bt_open.bo_mutex); 430 ASSERT(state->bt_open.bo_opened); 431 state->bt_open.bo_opened = B_FALSE; 432 cv_signal(&state->bt_open.bo_exit_cv); 433 mutex_exit(&state->bt_open.bo_mutex); 434 435 return (0); 436 } 437 438 439 /* 440 * xpvtap_ioctl() 441 */ 442 /*ARGSUSED*/ 443 static int 444 xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, 445 int *rval) 446 { 447 xpvtap_state_t *state; 448 int instance; 449 450 451 if (secpolicy_xvm_control(cred)) { 452 return (EPERM); 453 } 454 455 instance = getminor(dev); 456 if (instance == -1) { 457 return (EBADF); 458 } 459 460 state = ddi_get_soft_state(xpvtap_statep, instance); 461 if (state == NULL) { 462 return (EBADF); 463 } 464 465 switch (cmd) { 466 case XPVTAP_IOCTL_RESP_PUSH: 467 /* 468 * wake thread, thread handles guest requests and user app 469 * responses. 470 */ 471 mutex_enter(&state->bt_thread.ut_mutex); 472 state->bt_thread.ut_wake = B_TRUE; 473 cv_signal(&state->bt_thread.ut_wake_cv); 474 mutex_exit(&state->bt_thread.ut_mutex); 475 break; 476 477 default: 478 cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd); 479 return (ENXIO); 480 } 481 482 return (0); 483 } 484 485 486 /* 487 * xpvtap_segmap() 488 */ 489 /*ARGSUSED*/ 490 static int 491 xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp, 492 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags, 493 cred_t *cred_p) 494 { 495 struct segmf_crargs a; 496 xpvtap_state_t *state; 497 int instance; 498 int e; 499 500 501 if (secpolicy_xvm_control(cred_p)) { 502 return (EPERM); 503 } 504 505 instance = getminor(dev); 506 state = ddi_get_soft_state(xpvtap_statep, instance); 507 if (state == NULL) { 508 return (EBADF); 509 } 510 511 /* the user app should be doing a MAP_SHARED mapping */ 512 if ((flags & MAP_TYPE) != MAP_SHARED) { 513 return (EINVAL); 514 } 515 516 /* 517 * if this is the user ring (offset = 0), devmap it (which ends up in 518 * xpvtap_devmap). devmap will alloc and map the ring into the 519 * app's VA space. 520 */ 521 if (off == 0) { 522 e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len, 523 prot, maxprot, flags, cred_p); 524 return (e); 525 } 526 527 /* this should be the mmap for the gref pages (offset = PAGESIZE) */ 528 if (off != PAGESIZE) { 529 return (EINVAL); 530 } 531 532 /* make sure we get the size we're expecting */ 533 if (len != XPVTAP_GREF_BUFSIZE) { 534 return (EINVAL); 535 } 536 537 /* 538 * reserve user app VA space for the gref pages and use segmf to 539 * manage the backing store for the physical memory. segmf will 540 * map in/out the grefs and fault them in/out. 541 */ 542 ASSERT(asp == state->bt_map.um_as); 543 as_rangelock(asp); 544 if ((flags & MAP_FIXED) == 0) { 545 map_addr(addrp, len, 0, 0, flags); 546 if (*addrp == NULL) { 547 as_rangeunlock(asp); 548 return (ENOMEM); 549 } 550 } else { 551 /* User specified address */ 552 (void) as_unmap(asp, *addrp, len); 553 } 554 a.dev = dev; 555 a.prot = (uchar_t)prot; 556 a.maxprot = (uchar_t)maxprot; 557 e = as_map(asp, *addrp, len, segmf_create, &a); 558 if (e != 0) { 559 as_rangeunlock(asp); 560 return (e); 561 } 562 as_rangeunlock(asp); 563 564 /* 565 * Stash user base address, and compute address where the request 566 * array will end up. 567 */ 568 state->bt_map.um_guest_pages = (caddr_t)*addrp; 569 state->bt_map.um_guest_size = (size_t)len; 570 571 /* register an as callback so we can cleanup when the app goes away */ 572 e = as_add_callback(asp, xpvtap_segmf_unregister, state, 573 AS_UNMAP_EVENT, *addrp, len, KM_SLEEP); 574 if (e != 0) { 575 (void) as_unmap(asp, *addrp, len); 576 return (EINVAL); 577 } 578 579 /* wake thread to see if there are requests already queued up */ 580 mutex_enter(&state->bt_thread.ut_mutex); 581 state->bt_thread.ut_wake = B_TRUE; 582 cv_signal(&state->bt_thread.ut_wake_cv); 583 mutex_exit(&state->bt_thread.ut_mutex); 584 585 return (0); 586 } 587 588 589 /* 590 * xpvtap_devmap() 591 */ 592 /*ARGSUSED*/ 593 static int 594 xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len, 595 size_t *maplen, uint_t model) 596 { 597 xpvtap_user_ring_t *usring; 598 xpvtap_state_t *state; 599 int instance; 600 int e; 601 602 603 instance = getminor(dev); 604 state = ddi_get_soft_state(xpvtap_statep, instance); 605 if (state == NULL) { 606 return (EBADF); 607 } 608 609 /* we should only get here if the offset was == 0 */ 610 if (off != 0) { 611 return (EINVAL); 612 } 613 614 /* we should only be mapping in one page */ 615 if (len != PAGESIZE) { 616 return (EINVAL); 617 } 618 619 /* 620 * we already allocated the user ring during driver attach, all we 621 * need to do is map it into the user app's VA. 622 */ 623 usring = &state->bt_user_ring; 624 e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0, 625 PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL); 626 if (e < 0) { 627 return (e); 628 } 629 630 /* return the size to compete the devmap */ 631 *maplen = PAGESIZE; 632 633 return (0); 634 } 635 636 637 /* 638 * xpvtap_chpoll() 639 */ 640 static int 641 xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 642 struct pollhead **phpp) 643 { 644 xpvtap_user_ring_t *usring; 645 xpvtap_state_t *state; 646 int instance; 647 648 649 instance = getminor(dev); 650 if (instance == -1) { 651 return (EBADF); 652 } 653 state = ddi_get_soft_state(xpvtap_statep, instance); 654 if (state == NULL) { 655 return (EBADF); 656 } 657 658 if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) { 659 *reventsp = 0; 660 return (EINVAL); 661 } 662 663 /* 664 * if we pushed requests on the user ring since the last poll, wakeup 665 * the user app 666 */ 667 usring = &state->bt_user_ring; 668 if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) { 669 670 /* 671 * XXX - is this faster here or xpvtap_user_request_push?? 672 * prelim data says here. Because less membars or because 673 * user thread will spin in poll requests before getting to 674 * responses? 675 */ 676 RING_PUSH_REQUESTS(&usring->ur_ring); 677 678 usring->ur_prod_polled = usring->ur_ring.sring->req_prod; 679 *reventsp = POLLIN | POLLRDNORM; 680 681 /* no new requests */ 682 } else { 683 *reventsp = 0; 684 if (!anyyet) { 685 *phpp = &state->bt_pollhead; 686 } 687 } 688 689 return (0); 690 } 691 692 693 /* 694 * xpvtap_drv_init() 695 */ 696 static xpvtap_state_t * 697 xpvtap_drv_init(int instance) 698 { 699 xpvtap_state_t *state; 700 int e; 701 702 703 e = ddi_soft_state_zalloc(xpvtap_statep, instance); 704 if (e != DDI_SUCCESS) { 705 return (NULL); 706 } 707 state = ddi_get_soft_state(xpvtap_statep, instance); 708 if (state == NULL) { 709 goto drvinitfail_get_soft_state; 710 } 711 712 state->bt_instance = instance; 713 mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL); 714 cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL); 715 state->bt_open.bo_opened = B_FALSE; 716 state->bt_map.um_registered = B_FALSE; 717 718 /* initialize user ring, thread, mapping state */ 719 e = xpvtap_user_init(state); 720 if (e != DDI_SUCCESS) { 721 goto drvinitfail_userinit; 722 } 723 724 return (state); 725 726 drvinitfail_userinit: 727 cv_destroy(&state->bt_open.bo_exit_cv); 728 mutex_destroy(&state->bt_open.bo_mutex); 729 drvinitfail_get_soft_state: 730 (void) ddi_soft_state_free(xpvtap_statep, instance); 731 return (NULL); 732 } 733 734 735 /* 736 * xpvtap_drv_fini() 737 */ 738 static void 739 xpvtap_drv_fini(xpvtap_state_t *state) 740 { 741 xpvtap_user_fini(state); 742 cv_destroy(&state->bt_open.bo_exit_cv); 743 mutex_destroy(&state->bt_open.bo_mutex); 744 (void) ddi_soft_state_free(xpvtap_statep, state->bt_instance); 745 } 746 747 748 /* 749 * xpvtap_intr() 750 * this routine will be called when we have a request on the guest ring. 751 */ 752 static uint_t 753 xpvtap_intr(caddr_t arg) 754 { 755 xpvtap_state_t *state; 756 757 758 state = (xpvtap_state_t *)arg; 759 760 /* wake thread, thread handles guest requests and user app responses */ 761 mutex_enter(&state->bt_thread.ut_mutex); 762 state->bt_thread.ut_wake = B_TRUE; 763 cv_signal(&state->bt_thread.ut_wake_cv); 764 mutex_exit(&state->bt_thread.ut_mutex); 765 766 return (DDI_INTR_CLAIMED); 767 } 768 769 770 /* 771 * xpvtap_segmf_register() 772 */ 773 static int 774 xpvtap_segmf_register(xpvtap_state_t *state) 775 { 776 struct seg *seg; 777 uint64_t pte_ma; 778 struct as *as; 779 caddr_t uaddr; 780 uint_t pgcnt; 781 int i; 782 783 784 as = state->bt_map.um_as; 785 pgcnt = btopr(state->bt_map.um_guest_size); 786 uaddr = state->bt_map.um_guest_pages; 787 788 if (pgcnt == 0) { 789 return (DDI_FAILURE); 790 } 791 792 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 793 794 seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 795 if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) > 796 (seg->s_base + seg->s_size))) { 797 AS_LOCK_EXIT(as, &as->a_lock); 798 return (DDI_FAILURE); 799 } 800 801 /* 802 * lock down the htables so the HAT can't steal them. Register the 803 * PTE MA's for each gref page with seg_mf so we can do user space 804 * gref mappings. 805 */ 806 for (i = 0; i < pgcnt; i++) { 807 hat_prepare_mapping(as->a_hat, uaddr, &pte_ma); 808 hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0, 809 PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK, 810 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 811 hat_release_mapping(as->a_hat, uaddr); 812 segmf_add_gref_pte(seg, uaddr, pte_ma); 813 uaddr += PAGESIZE; 814 } 815 816 state->bt_map.um_registered = B_TRUE; 817 818 AS_LOCK_EXIT(as, &as->a_lock); 819 820 return (DDI_SUCCESS); 821 } 822 823 824 /* 825 * xpvtap_segmf_unregister() 826 * as_callback routine 827 */ 828 /*ARGSUSED*/ 829 static void 830 xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event) 831 { 832 xpvtap_state_t *state; 833 caddr_t uaddr; 834 uint_t pgcnt; 835 int i; 836 837 838 state = (xpvtap_state_t *)arg; 839 if (!state->bt_map.um_registered) { 840 return; 841 } 842 843 pgcnt = btopr(state->bt_map.um_guest_size); 844 uaddr = state->bt_map.um_guest_pages; 845 846 /* unmap any outstanding req's grefs */ 847 xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state); 848 849 /* Unlock the gref pages */ 850 for (i = 0; i < pgcnt; i++) { 851 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 852 hat_prepare_mapping(as->a_hat, uaddr, NULL); 853 hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK); 854 855 /* XXX Need to verify if we still need this */ 856 hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0, 857 PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK, 858 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 859 hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK); 860 861 hat_release_mapping(as->a_hat, uaddr); 862 AS_LOCK_EXIT(as, &as->a_lock); 863 uaddr += PAGESIZE; 864 } 865 866 /* remove the callback (which is this routine) */ 867 (void) as_delete_callback(as, arg); 868 869 state->bt_map.um_registered = B_FALSE; 870 } 871 872 873 /* 874 * xpvtap_user_init() 875 */ 876 static int 877 xpvtap_user_init(xpvtap_state_t *state) 878 { 879 xpvtap_user_map_t *map; 880 int e; 881 882 883 map = &state->bt_map; 884 885 /* Setup the ring between the driver and user app */ 886 e = xpvtap_user_ring_init(state); 887 if (e != DDI_SUCCESS) { 888 return (DDI_FAILURE); 889 } 890 891 /* 892 * the user ring can handle BLKIF_RING_SIZE outstanding requests. This 893 * is the same number of requests as the guest ring. Initialize the 894 * state we use to track request IDs to the user app. These IDs will 895 * also identify which group of gref pages correspond with the 896 * request. 897 */ 898 xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs); 899 900 /* 901 * allocate the space to store a copy of each outstanding requests. We 902 * will need to reference the ID and the number of segments when we 903 * get the response from the user app. 904 */ 905 map->um_outstanding_reqs = kmem_zalloc( 906 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE, 907 KM_SLEEP); 908 909 /* 910 * initialize the thread we use to process guest requests and user 911 * responses. 912 */ 913 e = xpvtap_user_thread_init(state); 914 if (e != DDI_SUCCESS) { 915 goto userinitfail_user_thread_init; 916 } 917 918 return (DDI_SUCCESS); 919 920 userinitfail_user_thread_init: 921 xpvtap_rs_fini(&map->um_rs); 922 kmem_free(map->um_outstanding_reqs, 923 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE); 924 xpvtap_user_ring_fini(state); 925 return (DDI_FAILURE); 926 } 927 928 929 /* 930 * xpvtap_user_ring_init() 931 */ 932 static int 933 xpvtap_user_ring_init(xpvtap_state_t *state) 934 { 935 xpvtap_user_ring_t *usring; 936 937 938 usring = &state->bt_user_ring; 939 940 /* alocate and initialize the page for the shared user ring */ 941 usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE, 942 DDI_UMEM_SLEEP, &usring->ur_cookie); 943 SHARED_RING_INIT(usring->ur_sring); 944 FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE); 945 usring->ur_prod_polled = 0; 946 947 return (DDI_SUCCESS); 948 } 949 950 951 /* 952 * xpvtap_user_thread_init() 953 */ 954 static int 955 xpvtap_user_thread_init(xpvtap_state_t *state) 956 { 957 xpvtap_user_thread_t *thread; 958 char taskqname[32]; 959 960 961 thread = &state->bt_thread; 962 963 mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL); 964 cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL); 965 cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL); 966 thread->ut_wake = B_FALSE; 967 thread->ut_exit = B_FALSE; 968 thread->ut_exit_done = B_TRUE; 969 970 /* create but don't start the user thread */ 971 (void) sprintf(taskqname, "xvptap_%d", state->bt_instance); 972 thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1, 973 TASKQ_DEFAULTPRI, 0); 974 if (thread->ut_taskq == NULL) { 975 goto userinitthrfail_taskq_create; 976 } 977 978 return (DDI_SUCCESS); 979 980 userinitthrfail_taskq_dispatch: 981 ddi_taskq_destroy(thread->ut_taskq); 982 userinitthrfail_taskq_create: 983 cv_destroy(&thread->ut_exit_done_cv); 984 cv_destroy(&thread->ut_wake_cv); 985 mutex_destroy(&thread->ut_mutex); 986 987 return (DDI_FAILURE); 988 } 989 990 991 /* 992 * xpvtap_user_thread_start() 993 */ 994 static void 995 xpvtap_user_thread_start(caddr_t arg) 996 { 997 xpvtap_user_thread_t *thread; 998 xpvtap_state_t *state; 999 int e; 1000 1001 1002 state = (xpvtap_state_t *)arg; 1003 thread = &state->bt_thread; 1004 1005 /* start the user thread */ 1006 thread->ut_exit_done = B_FALSE; 1007 e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state, 1008 DDI_SLEEP); 1009 if (e != DDI_SUCCESS) { 1010 thread->ut_exit_done = B_TRUE; 1011 cmn_err(CE_WARN, "Unable to start user thread\n"); 1012 } 1013 } 1014 1015 1016 /* 1017 * xpvtap_user_thread_stop() 1018 */ 1019 static void 1020 xpvtap_user_thread_stop(xpvtap_state_t *state) 1021 { 1022 /* wake thread so it can exit */ 1023 mutex_enter(&state->bt_thread.ut_mutex); 1024 state->bt_thread.ut_wake = B_TRUE; 1025 state->bt_thread.ut_exit = B_TRUE; 1026 cv_signal(&state->bt_thread.ut_wake_cv); 1027 if (!state->bt_thread.ut_exit_done) { 1028 cv_wait(&state->bt_thread.ut_exit_done_cv, 1029 &state->bt_thread.ut_mutex); 1030 } 1031 mutex_exit(&state->bt_thread.ut_mutex); 1032 ASSERT(state->bt_thread.ut_exit_done); 1033 } 1034 1035 1036 /* 1037 * xpvtap_user_fini() 1038 */ 1039 static void 1040 xpvtap_user_fini(xpvtap_state_t *state) 1041 { 1042 xpvtap_user_map_t *map; 1043 1044 1045 map = &state->bt_map; 1046 1047 xpvtap_user_thread_fini(state); 1048 xpvtap_rs_fini(&map->um_rs); 1049 kmem_free(map->um_outstanding_reqs, 1050 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE); 1051 xpvtap_user_ring_fini(state); 1052 } 1053 1054 1055 /* 1056 * xpvtap_user_ring_fini() 1057 */ 1058 static void 1059 xpvtap_user_ring_fini(xpvtap_state_t *state) 1060 { 1061 ddi_umem_free(state->bt_user_ring.ur_cookie); 1062 } 1063 1064 1065 /* 1066 * xpvtap_user_thread_fini() 1067 */ 1068 static void 1069 xpvtap_user_thread_fini(xpvtap_state_t *state) 1070 { 1071 ddi_taskq_destroy(state->bt_thread.ut_taskq); 1072 cv_destroy(&state->bt_thread.ut_exit_done_cv); 1073 cv_destroy(&state->bt_thread.ut_wake_cv); 1074 mutex_destroy(&state->bt_thread.ut_mutex); 1075 } 1076 1077 1078 /* 1079 * xpvtap_user_thread() 1080 */ 1081 static void 1082 xpvtap_user_thread(void *arg) 1083 { 1084 xpvtap_user_thread_t *thread; 1085 blkif_response_t resp; 1086 xpvtap_state_t *state; 1087 blkif_request_t req; 1088 boolean_t b; 1089 uint_t uid; 1090 int e; 1091 1092 1093 state = (xpvtap_state_t *)arg; 1094 thread = &state->bt_thread; 1095 1096 xpvtap_thread_start: 1097 /* See if we are supposed to exit */ 1098 mutex_enter(&thread->ut_mutex); 1099 if (thread->ut_exit) { 1100 thread->ut_exit_done = B_TRUE; 1101 cv_signal(&state->bt_thread.ut_exit_done_cv); 1102 mutex_exit(&thread->ut_mutex); 1103 return; 1104 } 1105 1106 /* 1107 * if we aren't supposed to be awake, wait until someone wakes us. 1108 * when we wake up, check for a kill or someone telling us to exit. 1109 */ 1110 if (!thread->ut_wake) { 1111 e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex); 1112 if ((e == 0) || (thread->ut_exit)) { 1113 thread->ut_exit = B_TRUE; 1114 mutex_exit(&thread->ut_mutex); 1115 goto xpvtap_thread_start; 1116 } 1117 } 1118 1119 /* if someone didn't wake us, go back to the start of the thread */ 1120 if (!thread->ut_wake) { 1121 mutex_exit(&thread->ut_mutex); 1122 goto xpvtap_thread_start; 1123 } 1124 1125 /* we are awake */ 1126 thread->ut_wake = B_FALSE; 1127 mutex_exit(&thread->ut_mutex); 1128 1129 /* process requests from the guest */ 1130 do { 1131 /* 1132 * check for requests from the guest. if we don't have any, 1133 * break out of the loop. 1134 */ 1135 e = blk_ring_request_get(state->bt_guest_ring, &req); 1136 if (e == B_FALSE) { 1137 break; 1138 } 1139 1140 /* we got a request, map the grefs into the user app's VA */ 1141 e = xpvtap_user_request_map(state, &req, &uid); 1142 if (e != DDI_SUCCESS) { 1143 /* 1144 * If we couldn't map the request (e.g. user app hasn't 1145 * opened the device yet), requeue it and try again 1146 * later 1147 */ 1148 blk_ring_request_requeue(state->bt_guest_ring); 1149 break; 1150 } 1151 1152 /* push the request to the user app */ 1153 e = xpvtap_user_request_push(state, &req, uid); 1154 if (e != DDI_SUCCESS) { 1155 resp.id = req.id; 1156 resp.operation = req.operation; 1157 resp.status = BLKIF_RSP_ERROR; 1158 blk_ring_response_put(state->bt_guest_ring, &resp); 1159 } 1160 } while (!thread->ut_exit); 1161 1162 /* process reponses from the user app */ 1163 do { 1164 /* 1165 * check for responses from the user app. if we don't have any, 1166 * break out of the loop. 1167 */ 1168 b = xpvtap_user_response_get(state, &resp, &uid); 1169 if (b != B_TRUE) { 1170 break; 1171 } 1172 1173 /* 1174 * if we got a response, unmap the grefs from the matching 1175 * request. 1176 */ 1177 xpvtap_user_request_unmap(state, uid); 1178 1179 /* push the response to the guest */ 1180 blk_ring_response_put(state->bt_guest_ring, &resp); 1181 } while (!thread->ut_exit); 1182 1183 goto xpvtap_thread_start; 1184 } 1185 1186 1187 /* 1188 * xpvtap_user_request_map() 1189 */ 1190 static int 1191 xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req, 1192 uint_t *uid) 1193 { 1194 grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 1195 struct seg *seg; 1196 struct as *as; 1197 domid_t domid; 1198 caddr_t uaddr; 1199 uint_t flags; 1200 int i; 1201 int e; 1202 1203 1204 domid = xvdi_get_oeid(state->bt_dip); 1205 1206 as = state->bt_map.um_as; 1207 if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) { 1208 return (DDI_FAILURE); 1209 } 1210 1211 /* has to happen after segmap returns */ 1212 if (!state->bt_map.um_registered) { 1213 /* register the pte's with segmf */ 1214 e = xpvtap_segmf_register(state); 1215 if (e != DDI_SUCCESS) { 1216 return (DDI_FAILURE); 1217 } 1218 } 1219 1220 /* alloc an ID for the user ring */ 1221 e = xpvtap_rs_alloc(state->bt_map.um_rs, uid); 1222 if (e != DDI_SUCCESS) { 1223 return (DDI_FAILURE); 1224 } 1225 1226 /* if we don't have any segments to map, we're done */ 1227 if ((req->operation == BLKIF_OP_WRITE_BARRIER) || 1228 (req->operation == BLKIF_OP_FLUSH_DISKCACHE) || 1229 (req->nr_segments == 0)) { 1230 return (DDI_SUCCESS); 1231 } 1232 1233 /* get the apps gref address */ 1234 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid); 1235 1236 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1237 seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 1238 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) > 1239 (seg->s_base + seg->s_size))) { 1240 AS_LOCK_EXIT(as, &as->a_lock); 1241 return (DDI_FAILURE); 1242 } 1243 1244 /* if we are reading from disk, we are writing into memory */ 1245 flags = 0; 1246 if (req->operation == BLKIF_OP_READ) { 1247 flags |= SEGMF_GREF_WR; 1248 } 1249 1250 /* Load the grefs into seg_mf */ 1251 for (i = 0; i < req->nr_segments; i++) { 1252 gref[i] = req->seg[i].gref; 1253 } 1254 (void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments, 1255 domid); 1256 1257 AS_LOCK_EXIT(as, &as->a_lock); 1258 1259 return (DDI_SUCCESS); 1260 } 1261 1262 1263 /* 1264 * xpvtap_user_request_push() 1265 */ 1266 static int 1267 xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req, 1268 uint_t uid) 1269 { 1270 blkif_request_t *outstanding_req; 1271 blkif_front_ring_t *uring; 1272 blkif_request_t *target; 1273 xpvtap_user_map_t *map; 1274 1275 1276 uring = &state->bt_user_ring.ur_ring; 1277 map = &state->bt_map; 1278 1279 target = RING_GET_REQUEST(uring, uring->req_prod_pvt); 1280 1281 /* 1282 * Save request from the frontend. used for ID mapping and unmap 1283 * on response/cleanup 1284 */ 1285 outstanding_req = &map->um_outstanding_reqs[uid]; 1286 bcopy(req, outstanding_req, sizeof (*outstanding_req)); 1287 1288 /* put the request on the user ring */ 1289 bcopy(req, target, sizeof (*req)); 1290 target->id = (uint64_t)uid; 1291 uring->req_prod_pvt++; 1292 1293 pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM); 1294 1295 return (DDI_SUCCESS); 1296 } 1297 1298 1299 static void 1300 xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid) 1301 { 1302 blkif_request_t *req; 1303 struct seg *seg; 1304 struct as *as; 1305 caddr_t uaddr; 1306 int e; 1307 1308 1309 as = state->bt_map.um_as; 1310 if (as == NULL) { 1311 return; 1312 } 1313 1314 /* get a copy of the original request */ 1315 req = &state->bt_map.um_outstanding_reqs[uid]; 1316 1317 /* unmap the grefs for this request */ 1318 if ((req->operation != BLKIF_OP_WRITE_BARRIER) && 1319 (req->operation != BLKIF_OP_FLUSH_DISKCACHE) && 1320 (req->nr_segments != 0)) { 1321 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid); 1322 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1323 seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 1324 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) > 1325 (seg->s_base + seg->s_size))) { 1326 AS_LOCK_EXIT(as, &as->a_lock); 1327 xpvtap_rs_free(state->bt_map.um_rs, uid); 1328 return; 1329 } 1330 1331 e = segmf_release_grefs(seg, uaddr, req->nr_segments); 1332 if (e != 0) { 1333 cmn_err(CE_WARN, "unable to release grefs"); 1334 } 1335 1336 AS_LOCK_EXIT(as, &as->a_lock); 1337 } 1338 1339 /* free up the user ring id */ 1340 xpvtap_rs_free(state->bt_map.um_rs, uid); 1341 } 1342 1343 1344 static int 1345 xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp, 1346 uint_t *uid) 1347 { 1348 blkif_front_ring_t *uring; 1349 blkif_response_t *target; 1350 1351 1352 uring = &state->bt_user_ring.ur_ring; 1353 1354 if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) { 1355 return (B_FALSE); 1356 } 1357 1358 target = NULL; 1359 target = RING_GET_RESPONSE(uring, uring->rsp_cons); 1360 if (target == NULL) { 1361 return (B_FALSE); 1362 } 1363 1364 /* copy out the user app response */ 1365 bcopy(target, resp, sizeof (*resp)); 1366 uring->rsp_cons++; 1367 1368 /* restore the quests id from the original request */ 1369 *uid = (uint_t)resp->id; 1370 resp->id = state->bt_map.um_outstanding_reqs[*uid].id; 1371 1372 return (B_TRUE); 1373 } 1374 1375 1376 /* 1377 * xpvtap_user_app_stop() 1378 */ 1379 static void xpvtap_user_app_stop(caddr_t arg) 1380 { 1381 xpvtap_state_t *state; 1382 clock_t timeout; 1383 clock_t rc; 1384 1385 1386 state = (xpvtap_state_t *)arg; 1387 1388 /* 1389 * Give the app 10 secs to exit. If it doesn't exit, it's not a serious 1390 * problem, we just won't auto-detach the driver. 1391 */ 1392 mutex_enter(&state->bt_open.bo_mutex); 1393 if (state->bt_open.bo_opened) { 1394 timeout = ddi_get_lbolt() + drv_usectohz(10000000); 1395 rc = cv_timedwait(&state->bt_open.bo_exit_cv, 1396 &state->bt_open.bo_mutex, timeout); 1397 if (rc <= 0) { 1398 cmn_err(CE_NOTE, "!user process still has driver open, " 1399 "deferring detach\n"); 1400 } 1401 } 1402 mutex_exit(&state->bt_open.bo_mutex); 1403 } 1404 1405 1406 /* 1407 * xpvtap_rs_init() 1408 * Initialize the resource structure. init() returns a handle to be used 1409 * for the rest of the resource functions. This code is written assuming 1410 * that min_val will be close to 0. Therefore, we will allocate the free 1411 * buffer only taking max_val into account. 1412 */ 1413 static void 1414 xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle) 1415 { 1416 xpvtap_rs_t *rstruct; 1417 uint_t array_size; 1418 uint_t index; 1419 1420 1421 ASSERT(handle != NULL); 1422 ASSERT(min_val < max_val); 1423 1424 /* alloc space for resource structure */ 1425 rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP); 1426 1427 /* 1428 * Test to see if the max value is 64-bit aligned. If so, we don't need 1429 * to allocate an extra 64-bit word. alloc space for free buffer 1430 * (8 bytes per uint64_t). 1431 */ 1432 if ((max_val & 0x3F) == 0) { 1433 rstruct->rs_free_size = (max_val >> 6) * 8; 1434 } else { 1435 rstruct->rs_free_size = ((max_val >> 6) + 1) * 8; 1436 } 1437 rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP); 1438 1439 /* Initialize resource structure */ 1440 rstruct->rs_min = min_val; 1441 rstruct->rs_last = min_val; 1442 rstruct->rs_max = max_val; 1443 mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL); 1444 rstruct->rs_flushing = B_FALSE; 1445 1446 /* Mark all resources as free */ 1447 array_size = rstruct->rs_free_size >> 3; 1448 for (index = 0; index < array_size; index++) { 1449 rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF; 1450 } 1451 1452 /* setup handle which is returned from this function */ 1453 *handle = rstruct; 1454 } 1455 1456 1457 /* 1458 * xpvtap_rs_fini() 1459 * Frees up the space allocated in init(). Notice that a pointer to the 1460 * handle is used for the parameter. fini() will set the handle to NULL 1461 * before returning. 1462 */ 1463 static void 1464 xpvtap_rs_fini(xpvtap_rs_hdl_t *handle) 1465 { 1466 xpvtap_rs_t *rstruct; 1467 1468 1469 ASSERT(handle != NULL); 1470 1471 rstruct = (xpvtap_rs_t *)*handle; 1472 1473 mutex_destroy(&rstruct->rs_mutex); 1474 kmem_free(rstruct->rs_free, rstruct->rs_free_size); 1475 kmem_free(rstruct, sizeof (xpvtap_rs_t)); 1476 1477 /* set handle to null. This helps catch bugs. */ 1478 *handle = NULL; 1479 } 1480 1481 1482 /* 1483 * xpvtap_rs_alloc() 1484 * alloc a resource. If alloc fails, we are out of resources. 1485 */ 1486 static int 1487 xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource) 1488 { 1489 xpvtap_rs_t *rstruct; 1490 uint_t array_idx; 1491 uint64_t free; 1492 uint_t index; 1493 uint_t last; 1494 uint_t min; 1495 uint_t max; 1496 1497 1498 ASSERT(handle != NULL); 1499 ASSERT(resource != NULL); 1500 1501 rstruct = (xpvtap_rs_t *)handle; 1502 1503 mutex_enter(&rstruct->rs_mutex); 1504 min = rstruct->rs_min; 1505 max = rstruct->rs_max; 1506 1507 /* 1508 * Find a free resource. This will return out of the loop once it finds 1509 * a free resource. There are a total of 'max'-'min'+1 resources. 1510 * Performs a round robin allocation. 1511 */ 1512 for (index = min; index <= max; index++) { 1513 1514 array_idx = rstruct->rs_last >> 6; 1515 free = rstruct->rs_free[array_idx]; 1516 last = rstruct->rs_last & 0x3F; 1517 1518 /* if the next resource to check is free */ 1519 if ((free & ((uint64_t)1 << last)) != 0) { 1520 /* we are using this resource */ 1521 *resource = rstruct->rs_last; 1522 1523 /* take it out of the free list */ 1524 rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last); 1525 1526 /* 1527 * increment the last count so we start checking the 1528 * next resource on the next alloc(). Note the rollover 1529 * at 'max'+1. 1530 */ 1531 rstruct->rs_last++; 1532 if (rstruct->rs_last > max) { 1533 rstruct->rs_last = rstruct->rs_min; 1534 } 1535 1536 /* unlock the resource structure */ 1537 mutex_exit(&rstruct->rs_mutex); 1538 1539 return (DDI_SUCCESS); 1540 } 1541 1542 /* 1543 * This resource is not free, lets go to the next one. Note the 1544 * rollover at 'max'. 1545 */ 1546 rstruct->rs_last++; 1547 if (rstruct->rs_last > max) { 1548 rstruct->rs_last = rstruct->rs_min; 1549 } 1550 } 1551 1552 mutex_exit(&rstruct->rs_mutex); 1553 1554 return (DDI_FAILURE); 1555 } 1556 1557 1558 /* 1559 * xpvtap_rs_free() 1560 * Free the previously alloc'd resource. Once a resource has been free'd, 1561 * it can be used again when alloc is called. 1562 */ 1563 static void 1564 xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource) 1565 { 1566 xpvtap_rs_t *rstruct; 1567 uint_t array_idx; 1568 uint_t offset; 1569 1570 1571 ASSERT(handle != NULL); 1572 1573 rstruct = (xpvtap_rs_t *)handle; 1574 ASSERT(resource >= rstruct->rs_min); 1575 ASSERT(resource <= rstruct->rs_max); 1576 1577 if (!rstruct->rs_flushing) { 1578 mutex_enter(&rstruct->rs_mutex); 1579 } 1580 1581 /* Put the resource back in the free list */ 1582 array_idx = resource >> 6; 1583 offset = resource & 0x3F; 1584 rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset); 1585 1586 if (!rstruct->rs_flushing) { 1587 mutex_exit(&rstruct->rs_mutex); 1588 } 1589 } 1590 1591 1592 /* 1593 * xpvtap_rs_flush() 1594 */ 1595 static void 1596 xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback, 1597 void *arg) 1598 { 1599 xpvtap_rs_t *rstruct; 1600 uint_t array_idx; 1601 uint64_t free; 1602 uint_t index; 1603 uint_t last; 1604 uint_t min; 1605 uint_t max; 1606 1607 1608 ASSERT(handle != NULL); 1609 1610 rstruct = (xpvtap_rs_t *)handle; 1611 1612 mutex_enter(&rstruct->rs_mutex); 1613 min = rstruct->rs_min; 1614 max = rstruct->rs_max; 1615 1616 rstruct->rs_flushing = B_TRUE; 1617 1618 /* 1619 * for all resources not free, call the callback routine to clean it 1620 * up. 1621 */ 1622 for (index = min; index <= max; index++) { 1623 1624 array_idx = rstruct->rs_last >> 6; 1625 free = rstruct->rs_free[array_idx]; 1626 last = rstruct->rs_last & 0x3F; 1627 1628 /* if the next resource to check is not free */ 1629 if ((free & ((uint64_t)1 << last)) == 0) { 1630 /* call the callback to cleanup */ 1631 (*callback)(arg, rstruct->rs_last); 1632 1633 /* put it back in the free list */ 1634 rstruct->rs_free[array_idx] |= ((uint64_t)1 << last); 1635 } 1636 1637 /* go to the next one. Note the rollover at 'max' */ 1638 rstruct->rs_last++; 1639 if (rstruct->rs_last > max) { 1640 rstruct->rs_last = rstruct->rs_min; 1641 } 1642 } 1643 1644 mutex_exit(&rstruct->rs_mutex); 1645 } 1646