1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2017 Joyent, Inc. 26 */ 27 28 29 #include <sys/errno.h> 30 #include <sys/types.h> 31 #include <sys/conf.h> 32 #include <sys/kmem.h> 33 #include <sys/ddi.h> 34 #include <sys/stat.h> 35 #include <sys/sunddi.h> 36 #include <sys/file.h> 37 #include <sys/open.h> 38 #include <sys/modctl.h> 39 #include <sys/ddi_impldefs.h> 40 #include <sys/sysmacros.h> 41 #include <sys/ddidevmap.h> 42 #include <sys/policy.h> 43 44 #include <sys/vmsystm.h> 45 #include <vm/hat_i86.h> 46 #include <vm/hat_pte.h> 47 #include <vm/seg_kmem.h> 48 #include <vm/seg_mf.h> 49 50 #include <xen/io/blkif_impl.h> 51 #include <xen/io/blk_common.h> 52 #include <xen/io/xpvtap.h> 53 54 55 static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred); 56 static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred); 57 static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 58 cred_t *cred, int *rval); 59 static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, 60 size_t len, size_t *maplen, uint_t model); 61 static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp, 62 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags, 63 cred_t *cred_p); 64 static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 65 struct pollhead **phpp); 66 67 static struct cb_ops xpvtap_cb_ops = { 68 xpvtap_open, /* cb_open */ 69 xpvtap_close, /* cb_close */ 70 nodev, /* cb_strategy */ 71 nodev, /* cb_print */ 72 nodev, /* cb_dump */ 73 nodev, /* cb_read */ 74 nodev, /* cb_write */ 75 xpvtap_ioctl, /* cb_ioctl */ 76 xpvtap_devmap, /* cb_devmap */ 77 nodev, /* cb_mmap */ 78 xpvtap_segmap, /* cb_segmap */ 79 xpvtap_chpoll, /* cb_chpoll */ 80 ddi_prop_op, /* cb_prop_op */ 81 NULL, /* cb_stream */ 82 D_NEW | D_MP | D_64BIT | D_DEVMAP, /* cb_flag */ 83 CB_REV 84 }; 85 86 static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, 87 void **result); 88 static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd); 89 static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd); 90 91 static struct dev_ops xpvtap_dev_ops = { 92 DEVO_REV, /* devo_rev */ 93 0, /* devo_refcnt */ 94 xpvtap_getinfo, /* devo_getinfo */ 95 nulldev, /* devo_identify */ 96 nulldev, /* devo_probe */ 97 xpvtap_attach, /* devo_attach */ 98 xpvtap_detach, /* devo_detach */ 99 nodev, /* devo_reset */ 100 &xpvtap_cb_ops, /* devo_cb_ops */ 101 NULL, /* devo_bus_ops */ 102 NULL /* power */ 103 }; 104 105 106 static struct modldrv xpvtap_modldrv = { 107 &mod_driverops, /* Type of module. This one is a driver */ 108 "xpvtap driver", /* Name of the module. */ 109 &xpvtap_dev_ops, /* driver ops */ 110 }; 111 112 static struct modlinkage xpvtap_modlinkage = { 113 MODREV_1, 114 (void *) &xpvtap_modldrv, 115 NULL 116 }; 117 118 119 void *xpvtap_statep; 120 121 122 static xpvtap_state_t *xpvtap_drv_init(int instance); 123 static void xpvtap_drv_fini(xpvtap_state_t *state); 124 static uint_t xpvtap_intr(caddr_t arg); 125 126 typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs); 127 static void xpvtap_rs_init(uint_t min_val, uint_t max_val, 128 xpvtap_rs_hdl_t *handle); 129 static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle); 130 static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs); 131 static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs); 132 static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle, 133 xpvtap_rs_cleanup_t callback, void *arg); 134 135 static int xpvtap_segmf_register(xpvtap_state_t *state); 136 static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event); 137 138 static int xpvtap_user_init(xpvtap_state_t *state); 139 static void xpvtap_user_fini(xpvtap_state_t *state); 140 static int xpvtap_user_ring_init(xpvtap_state_t *state); 141 static void xpvtap_user_ring_fini(xpvtap_state_t *state); 142 static int xpvtap_user_thread_init(xpvtap_state_t *state); 143 static void xpvtap_user_thread_fini(xpvtap_state_t *state); 144 static void xpvtap_user_thread_start(caddr_t arg); 145 static void xpvtap_user_thread_stop(xpvtap_state_t *state); 146 static void xpvtap_user_thread(void *arg); 147 148 static void xpvtap_user_app_stop(caddr_t arg); 149 150 static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req, 151 uint_t *uid); 152 static int xpvtap_user_request_push(xpvtap_state_t *state, 153 blkif_request_t *req, uint_t uid); 154 static int xpvtap_user_response_get(xpvtap_state_t *state, 155 blkif_response_t *resp, uint_t *uid); 156 static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid); 157 158 159 /* 160 * _init() 161 */ 162 int 163 _init(void) 164 { 165 int e; 166 167 e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1); 168 if (e != 0) { 169 return (e); 170 } 171 172 e = mod_install(&xpvtap_modlinkage); 173 if (e != 0) { 174 ddi_soft_state_fini(&xpvtap_statep); 175 return (e); 176 } 177 178 return (0); 179 } 180 181 182 /* 183 * _info() 184 */ 185 int 186 _info(struct modinfo *modinfop) 187 { 188 return (mod_info(&xpvtap_modlinkage, modinfop)); 189 } 190 191 192 /* 193 * _fini() 194 */ 195 int 196 _fini(void) 197 { 198 int e; 199 200 e = mod_remove(&xpvtap_modlinkage); 201 if (e != 0) { 202 return (e); 203 } 204 205 ddi_soft_state_fini(&xpvtap_statep); 206 207 return (0); 208 } 209 210 211 /* 212 * xpvtap_attach() 213 */ 214 static int 215 xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 216 { 217 blk_ringinit_args_t args; 218 xpvtap_state_t *state; 219 int instance; 220 int e; 221 222 223 switch (cmd) { 224 case DDI_ATTACH: 225 break; 226 227 case DDI_RESUME: 228 return (DDI_SUCCESS); 229 230 default: 231 return (DDI_FAILURE); 232 } 233 234 /* initialize our state info */ 235 instance = ddi_get_instance(dip); 236 state = xpvtap_drv_init(instance); 237 if (state == NULL) { 238 return (DDI_FAILURE); 239 } 240 state->bt_dip = dip; 241 242 /* Initialize the guest ring */ 243 args.ar_dip = state->bt_dip; 244 args.ar_intr = xpvtap_intr; 245 args.ar_intr_arg = (caddr_t)state; 246 args.ar_ringup = xpvtap_user_thread_start; 247 args.ar_ringup_arg = (caddr_t)state; 248 args.ar_ringdown = xpvtap_user_app_stop; 249 args.ar_ringdown_arg = (caddr_t)state; 250 e = blk_ring_init(&args, &state->bt_guest_ring); 251 if (e != DDI_SUCCESS) { 252 goto attachfail_ringinit; 253 } 254 255 /* create the minor node (for ioctl/mmap) */ 256 e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance, 257 DDI_PSEUDO, 0); 258 if (e != DDI_SUCCESS) { 259 goto attachfail_minor_node; 260 } 261 262 /* Report that driver was loaded */ 263 ddi_report_dev(dip); 264 265 return (DDI_SUCCESS); 266 267 attachfail_minor_node: 268 blk_ring_fini(&state->bt_guest_ring); 269 attachfail_ringinit: 270 xpvtap_drv_fini(state); 271 return (DDI_FAILURE); 272 } 273 274 275 /* 276 * xpvtap_detach() 277 */ 278 static int 279 xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 280 { 281 xpvtap_state_t *state; 282 int instance; 283 284 285 instance = ddi_get_instance(dip); 286 state = ddi_get_soft_state(xpvtap_statep, instance); 287 if (state == NULL) { 288 return (DDI_FAILURE); 289 } 290 291 switch (cmd) { 292 case DDI_DETACH: 293 break; 294 295 case DDI_SUSPEND: 296 default: 297 return (DDI_FAILURE); 298 } 299 300 xpvtap_user_thread_stop(state); 301 blk_ring_fini(&state->bt_guest_ring); 302 xpvtap_drv_fini(state); 303 ddi_remove_minor_node(dip, NULL); 304 305 return (DDI_SUCCESS); 306 } 307 308 309 /* 310 * xpvtap_getinfo() 311 */ 312 /*ARGSUSED*/ 313 static int 314 xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 315 { 316 xpvtap_state_t *state; 317 int instance; 318 dev_t dev; 319 int e; 320 321 322 dev = (dev_t)arg; 323 instance = getminor(dev); 324 325 switch (cmd) { 326 case DDI_INFO_DEVT2DEVINFO: 327 state = ddi_get_soft_state(xpvtap_statep, instance); 328 if (state == NULL) { 329 return (DDI_FAILURE); 330 } 331 *result = (void *)state->bt_dip; 332 e = DDI_SUCCESS; 333 break; 334 335 case DDI_INFO_DEVT2INSTANCE: 336 *result = (void *)(uintptr_t)instance; 337 e = DDI_SUCCESS; 338 break; 339 340 default: 341 e = DDI_FAILURE; 342 break; 343 } 344 345 return (e); 346 } 347 348 349 /* 350 * xpvtap_open() 351 */ 352 /*ARGSUSED*/ 353 static int 354 xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred) 355 { 356 xpvtap_state_t *state; 357 int instance; 358 359 360 if (secpolicy_xvm_control(cred)) { 361 return (EPERM); 362 } 363 364 instance = getminor(*devp); 365 state = ddi_get_soft_state(xpvtap_statep, instance); 366 if (state == NULL) { 367 return (ENXIO); 368 } 369 370 /* we should only be opened once */ 371 mutex_enter(&state->bt_open.bo_mutex); 372 if (state->bt_open.bo_opened) { 373 mutex_exit(&state->bt_open.bo_mutex); 374 return (EBUSY); 375 } 376 state->bt_open.bo_opened = B_TRUE; 377 mutex_exit(&state->bt_open.bo_mutex); 378 379 /* 380 * save the apps address space. need it for mapping/unmapping grefs 381 * since will be doing it in a separate kernel thread. 382 */ 383 state->bt_map.um_as = curproc->p_as; 384 385 return (0); 386 } 387 388 389 /* 390 * xpvtap_close() 391 */ 392 /*ARGSUSED*/ 393 static int 394 xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred) 395 { 396 xpvtap_state_t *state; 397 int instance; 398 399 400 instance = getminor(devp); 401 state = ddi_get_soft_state(xpvtap_statep, instance); 402 if (state == NULL) { 403 return (ENXIO); 404 } 405 406 /* 407 * wake thread so it can cleanup and wait for it to exit so we can 408 * be sure it's not in the middle of processing a request/response. 409 */ 410 mutex_enter(&state->bt_thread.ut_mutex); 411 state->bt_thread.ut_wake = B_TRUE; 412 state->bt_thread.ut_exit = B_TRUE; 413 cv_signal(&state->bt_thread.ut_wake_cv); 414 if (!state->bt_thread.ut_exit_done) { 415 cv_wait(&state->bt_thread.ut_exit_done_cv, 416 &state->bt_thread.ut_mutex); 417 } 418 ASSERT(state->bt_thread.ut_exit_done); 419 mutex_exit(&state->bt_thread.ut_mutex); 420 421 state->bt_map.um_as = NULL; 422 state->bt_map.um_guest_pages = NULL; 423 424 /* 425 * when the ring is brought down, a userland hotplug script is run 426 * which tries to bring the userland app down. We'll wait for a bit 427 * for the user app to exit. Notify the thread waiting that the app 428 * has closed the driver. 429 */ 430 mutex_enter(&state->bt_open.bo_mutex); 431 ASSERT(state->bt_open.bo_opened); 432 state->bt_open.bo_opened = B_FALSE; 433 cv_signal(&state->bt_open.bo_exit_cv); 434 mutex_exit(&state->bt_open.bo_mutex); 435 436 return (0); 437 } 438 439 440 /* 441 * xpvtap_ioctl() 442 */ 443 /*ARGSUSED*/ 444 static int 445 xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, 446 int *rval) 447 { 448 xpvtap_state_t *state; 449 int instance; 450 451 452 if (secpolicy_xvm_control(cred)) { 453 return (EPERM); 454 } 455 456 instance = getminor(dev); 457 if (instance == -1) { 458 return (EBADF); 459 } 460 461 state = ddi_get_soft_state(xpvtap_statep, instance); 462 if (state == NULL) { 463 return (EBADF); 464 } 465 466 switch (cmd) { 467 case XPVTAP_IOCTL_RESP_PUSH: 468 /* 469 * wake thread, thread handles guest requests and user app 470 * responses. 471 */ 472 mutex_enter(&state->bt_thread.ut_mutex); 473 state->bt_thread.ut_wake = B_TRUE; 474 cv_signal(&state->bt_thread.ut_wake_cv); 475 mutex_exit(&state->bt_thread.ut_mutex); 476 break; 477 478 default: 479 cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd); 480 return (ENXIO); 481 } 482 483 return (0); 484 } 485 486 487 /* 488 * xpvtap_segmap() 489 */ 490 /*ARGSUSED*/ 491 static int 492 xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp, 493 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags, 494 cred_t *cred_p) 495 { 496 struct segmf_crargs a; 497 xpvtap_state_t *state; 498 int instance; 499 int e; 500 501 502 if (secpolicy_xvm_control(cred_p)) { 503 return (EPERM); 504 } 505 506 instance = getminor(dev); 507 state = ddi_get_soft_state(xpvtap_statep, instance); 508 if (state == NULL) { 509 return (EBADF); 510 } 511 512 /* the user app should be doing a MAP_SHARED mapping */ 513 if ((flags & MAP_TYPE) != MAP_SHARED) { 514 return (EINVAL); 515 } 516 517 /* 518 * if this is the user ring (offset = 0), devmap it (which ends up in 519 * xpvtap_devmap). devmap will alloc and map the ring into the 520 * app's VA space. 521 */ 522 if (off == 0) { 523 e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len, 524 prot, maxprot, flags, cred_p); 525 return (e); 526 } 527 528 /* this should be the mmap for the gref pages (offset = PAGESIZE) */ 529 if (off != PAGESIZE) { 530 return (EINVAL); 531 } 532 533 /* make sure we get the size we're expecting */ 534 if (len != XPVTAP_GREF_BUFSIZE) { 535 return (EINVAL); 536 } 537 538 /* 539 * reserve user app VA space for the gref pages and use segmf to 540 * manage the backing store for the physical memory. segmf will 541 * map in/out the grefs and fault them in/out. 542 */ 543 ASSERT(asp == state->bt_map.um_as); 544 as_rangelock(asp); 545 if ((flags & MAP_FIXED) == 0) { 546 map_addr(addrp, len, 0, 0, flags); 547 if (*addrp == NULL) { 548 as_rangeunlock(asp); 549 return (ENOMEM); 550 } 551 } else { 552 /* User specified address */ 553 (void) as_unmap(asp, *addrp, len); 554 } 555 a.dev = dev; 556 a.prot = (uchar_t)prot; 557 a.maxprot = (uchar_t)maxprot; 558 e = as_map(asp, *addrp, len, segmf_create, &a); 559 if (e != 0) { 560 as_rangeunlock(asp); 561 return (e); 562 } 563 as_rangeunlock(asp); 564 565 /* 566 * Stash user base address, and compute address where the request 567 * array will end up. 568 */ 569 state->bt_map.um_guest_pages = (caddr_t)*addrp; 570 state->bt_map.um_guest_size = (size_t)len; 571 572 /* register an as callback so we can cleanup when the app goes away */ 573 e = as_add_callback(asp, xpvtap_segmf_unregister, state, 574 AS_UNMAP_EVENT, *addrp, len, KM_SLEEP); 575 if (e != 0) { 576 (void) as_unmap(asp, *addrp, len); 577 return (EINVAL); 578 } 579 580 /* wake thread to see if there are requests already queued up */ 581 mutex_enter(&state->bt_thread.ut_mutex); 582 state->bt_thread.ut_wake = B_TRUE; 583 cv_signal(&state->bt_thread.ut_wake_cv); 584 mutex_exit(&state->bt_thread.ut_mutex); 585 586 return (0); 587 } 588 589 590 /* 591 * xpvtap_devmap() 592 */ 593 /*ARGSUSED*/ 594 static int 595 xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len, 596 size_t *maplen, uint_t model) 597 { 598 xpvtap_user_ring_t *usring; 599 xpvtap_state_t *state; 600 int instance; 601 int e; 602 603 604 instance = getminor(dev); 605 state = ddi_get_soft_state(xpvtap_statep, instance); 606 if (state == NULL) { 607 return (EBADF); 608 } 609 610 /* we should only get here if the offset was == 0 */ 611 if (off != 0) { 612 return (EINVAL); 613 } 614 615 /* we should only be mapping in one page */ 616 if (len != PAGESIZE) { 617 return (EINVAL); 618 } 619 620 /* 621 * we already allocated the user ring during driver attach, all we 622 * need to do is map it into the user app's VA. 623 */ 624 usring = &state->bt_user_ring; 625 e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0, 626 PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL); 627 if (e < 0) { 628 return (e); 629 } 630 631 /* return the size to compete the devmap */ 632 *maplen = PAGESIZE; 633 634 return (0); 635 } 636 637 638 /* 639 * xpvtap_chpoll() 640 */ 641 static int 642 xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 643 struct pollhead **phpp) 644 { 645 xpvtap_user_ring_t *usring; 646 xpvtap_state_t *state; 647 int instance; 648 649 650 instance = getminor(dev); 651 if (instance == -1) { 652 return (EBADF); 653 } 654 state = ddi_get_soft_state(xpvtap_statep, instance); 655 if (state == NULL) { 656 return (EBADF); 657 } 658 659 if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) { 660 return (EINVAL); 661 } 662 663 /* 664 * if we pushed requests on the user ring since the last poll, wakeup 665 * the user app 666 */ 667 *reventsp = 0; 668 usring = &state->bt_user_ring; 669 if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) { 670 671 /* 672 * XXX - is this faster here or xpvtap_user_request_push?? 673 * prelim data says here. Because less membars or because 674 * user thread will spin in poll requests before getting to 675 * responses? 676 */ 677 RING_PUSH_REQUESTS(&usring->ur_ring); 678 679 usring->ur_prod_polled = usring->ur_ring.sring->req_prod; 680 *reventsp = POLLIN | POLLRDNORM; 681 } 682 683 if ((*reventsp == 0 && !anyyet) || (events & POLLET)) { 684 *phpp = &state->bt_pollhead; 685 } 686 687 return (0); 688 } 689 690 691 /* 692 * xpvtap_drv_init() 693 */ 694 static xpvtap_state_t * 695 xpvtap_drv_init(int instance) 696 { 697 xpvtap_state_t *state; 698 int e; 699 700 701 e = ddi_soft_state_zalloc(xpvtap_statep, instance); 702 if (e != DDI_SUCCESS) { 703 return (NULL); 704 } 705 state = ddi_get_soft_state(xpvtap_statep, instance); 706 if (state == NULL) { 707 goto drvinitfail_get_soft_state; 708 } 709 710 state->bt_instance = instance; 711 mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL); 712 cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL); 713 state->bt_open.bo_opened = B_FALSE; 714 state->bt_map.um_registered = B_FALSE; 715 716 /* initialize user ring, thread, mapping state */ 717 e = xpvtap_user_init(state); 718 if (e != DDI_SUCCESS) { 719 goto drvinitfail_userinit; 720 } 721 722 return (state); 723 724 drvinitfail_userinit: 725 cv_destroy(&state->bt_open.bo_exit_cv); 726 mutex_destroy(&state->bt_open.bo_mutex); 727 drvinitfail_get_soft_state: 728 (void) ddi_soft_state_free(xpvtap_statep, instance); 729 return (NULL); 730 } 731 732 733 /* 734 * xpvtap_drv_fini() 735 */ 736 static void 737 xpvtap_drv_fini(xpvtap_state_t *state) 738 { 739 xpvtap_user_fini(state); 740 cv_destroy(&state->bt_open.bo_exit_cv); 741 mutex_destroy(&state->bt_open.bo_mutex); 742 (void) ddi_soft_state_free(xpvtap_statep, state->bt_instance); 743 } 744 745 746 /* 747 * xpvtap_intr() 748 * this routine will be called when we have a request on the guest ring. 749 */ 750 static uint_t 751 xpvtap_intr(caddr_t arg) 752 { 753 xpvtap_state_t *state; 754 755 756 state = (xpvtap_state_t *)arg; 757 758 /* wake thread, thread handles guest requests and user app responses */ 759 mutex_enter(&state->bt_thread.ut_mutex); 760 state->bt_thread.ut_wake = B_TRUE; 761 cv_signal(&state->bt_thread.ut_wake_cv); 762 mutex_exit(&state->bt_thread.ut_mutex); 763 764 return (DDI_INTR_CLAIMED); 765 } 766 767 768 /* 769 * xpvtap_segmf_register() 770 */ 771 static int 772 xpvtap_segmf_register(xpvtap_state_t *state) 773 { 774 struct seg *seg; 775 uint64_t pte_ma; 776 struct as *as; 777 caddr_t uaddr; 778 uint_t pgcnt; 779 int i; 780 781 782 as = state->bt_map.um_as; 783 pgcnt = btopr(state->bt_map.um_guest_size); 784 uaddr = state->bt_map.um_guest_pages; 785 786 if (pgcnt == 0) { 787 return (DDI_FAILURE); 788 } 789 790 AS_LOCK_ENTER(as, RW_READER); 791 792 seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 793 if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) > 794 (seg->s_base + seg->s_size))) { 795 AS_LOCK_EXIT(as); 796 return (DDI_FAILURE); 797 } 798 799 /* 800 * lock down the htables so the HAT can't steal them. Register the 801 * PTE MA's for each gref page with seg_mf so we can do user space 802 * gref mappings. 803 */ 804 for (i = 0; i < pgcnt; i++) { 805 hat_prepare_mapping(as->a_hat, uaddr, &pte_ma); 806 hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0, 807 PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK, 808 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 809 hat_release_mapping(as->a_hat, uaddr); 810 segmf_add_gref_pte(seg, uaddr, pte_ma); 811 uaddr += PAGESIZE; 812 } 813 814 state->bt_map.um_registered = B_TRUE; 815 816 AS_LOCK_EXIT(as); 817 818 return (DDI_SUCCESS); 819 } 820 821 822 /* 823 * xpvtap_segmf_unregister() 824 * as_callback routine 825 */ 826 /*ARGSUSED*/ 827 static void 828 xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event) 829 { 830 xpvtap_state_t *state; 831 caddr_t uaddr; 832 uint_t pgcnt; 833 int i; 834 835 836 state = (xpvtap_state_t *)arg; 837 if (!state->bt_map.um_registered) { 838 /* remove the callback (which is this routine) */ 839 (void) as_delete_callback(as, arg); 840 return; 841 } 842 843 pgcnt = btopr(state->bt_map.um_guest_size); 844 uaddr = state->bt_map.um_guest_pages; 845 846 /* unmap any outstanding req's grefs */ 847 xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state); 848 849 /* Unlock the gref pages */ 850 for (i = 0; i < pgcnt; i++) { 851 AS_LOCK_ENTER(as, RW_WRITER); 852 hat_prepare_mapping(as->a_hat, uaddr, NULL); 853 hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK); 854 hat_release_mapping(as->a_hat, uaddr); 855 AS_LOCK_EXIT(as); 856 uaddr += PAGESIZE; 857 } 858 859 /* remove the callback (which is this routine) */ 860 (void) as_delete_callback(as, arg); 861 862 state->bt_map.um_registered = B_FALSE; 863 } 864 865 866 /* 867 * xpvtap_user_init() 868 */ 869 static int 870 xpvtap_user_init(xpvtap_state_t *state) 871 { 872 xpvtap_user_map_t *map; 873 int e; 874 875 876 map = &state->bt_map; 877 878 /* Setup the ring between the driver and user app */ 879 e = xpvtap_user_ring_init(state); 880 if (e != DDI_SUCCESS) { 881 return (DDI_FAILURE); 882 } 883 884 /* 885 * the user ring can handle BLKIF_RING_SIZE outstanding requests. This 886 * is the same number of requests as the guest ring. Initialize the 887 * state we use to track request IDs to the user app. These IDs will 888 * also identify which group of gref pages correspond with the 889 * request. 890 */ 891 xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs); 892 893 /* 894 * allocate the space to store a copy of each outstanding requests. We 895 * will need to reference the ID and the number of segments when we 896 * get the response from the user app. 897 */ 898 map->um_outstanding_reqs = kmem_zalloc( 899 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE, 900 KM_SLEEP); 901 902 /* 903 * initialize the thread we use to process guest requests and user 904 * responses. 905 */ 906 e = xpvtap_user_thread_init(state); 907 if (e != DDI_SUCCESS) { 908 goto userinitfail_user_thread_init; 909 } 910 911 return (DDI_SUCCESS); 912 913 userinitfail_user_thread_init: 914 xpvtap_rs_fini(&map->um_rs); 915 kmem_free(map->um_outstanding_reqs, 916 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE); 917 xpvtap_user_ring_fini(state); 918 return (DDI_FAILURE); 919 } 920 921 922 /* 923 * xpvtap_user_ring_init() 924 */ 925 static int 926 xpvtap_user_ring_init(xpvtap_state_t *state) 927 { 928 xpvtap_user_ring_t *usring; 929 930 931 usring = &state->bt_user_ring; 932 933 /* alocate and initialize the page for the shared user ring */ 934 usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE, 935 DDI_UMEM_SLEEP, &usring->ur_cookie); 936 SHARED_RING_INIT(usring->ur_sring); 937 FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE); 938 usring->ur_prod_polled = 0; 939 940 return (DDI_SUCCESS); 941 } 942 943 944 /* 945 * xpvtap_user_thread_init() 946 */ 947 static int 948 xpvtap_user_thread_init(xpvtap_state_t *state) 949 { 950 xpvtap_user_thread_t *thread; 951 char taskqname[32]; 952 953 954 thread = &state->bt_thread; 955 956 mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL); 957 cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL); 958 cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL); 959 thread->ut_wake = B_FALSE; 960 thread->ut_exit = B_FALSE; 961 thread->ut_exit_done = B_TRUE; 962 963 /* create but don't start the user thread */ 964 (void) sprintf(taskqname, "xvptap_%d", state->bt_instance); 965 thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1, 966 TASKQ_DEFAULTPRI, 0); 967 if (thread->ut_taskq == NULL) { 968 goto userinitthrfail_taskq_create; 969 } 970 971 return (DDI_SUCCESS); 972 973 userinitthrfail_taskq_create: 974 cv_destroy(&thread->ut_exit_done_cv); 975 cv_destroy(&thread->ut_wake_cv); 976 mutex_destroy(&thread->ut_mutex); 977 978 return (DDI_FAILURE); 979 } 980 981 982 /* 983 * xpvtap_user_thread_start() 984 */ 985 static void 986 xpvtap_user_thread_start(caddr_t arg) 987 { 988 xpvtap_user_thread_t *thread; 989 xpvtap_state_t *state; 990 int e; 991 992 993 state = (xpvtap_state_t *)arg; 994 thread = &state->bt_thread; 995 996 /* start the user thread */ 997 thread->ut_exit_done = B_FALSE; 998 e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state, 999 DDI_SLEEP); 1000 if (e != DDI_SUCCESS) { 1001 thread->ut_exit_done = B_TRUE; 1002 cmn_err(CE_WARN, "Unable to start user thread\n"); 1003 } 1004 } 1005 1006 1007 /* 1008 * xpvtap_user_thread_stop() 1009 */ 1010 static void 1011 xpvtap_user_thread_stop(xpvtap_state_t *state) 1012 { 1013 /* wake thread so it can exit */ 1014 mutex_enter(&state->bt_thread.ut_mutex); 1015 state->bt_thread.ut_wake = B_TRUE; 1016 state->bt_thread.ut_exit = B_TRUE; 1017 cv_signal(&state->bt_thread.ut_wake_cv); 1018 if (!state->bt_thread.ut_exit_done) { 1019 cv_wait(&state->bt_thread.ut_exit_done_cv, 1020 &state->bt_thread.ut_mutex); 1021 } 1022 mutex_exit(&state->bt_thread.ut_mutex); 1023 ASSERT(state->bt_thread.ut_exit_done); 1024 } 1025 1026 1027 /* 1028 * xpvtap_user_fini() 1029 */ 1030 static void 1031 xpvtap_user_fini(xpvtap_state_t *state) 1032 { 1033 xpvtap_user_map_t *map; 1034 1035 1036 map = &state->bt_map; 1037 1038 xpvtap_user_thread_fini(state); 1039 xpvtap_rs_fini(&map->um_rs); 1040 kmem_free(map->um_outstanding_reqs, 1041 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE); 1042 xpvtap_user_ring_fini(state); 1043 } 1044 1045 1046 /* 1047 * xpvtap_user_ring_fini() 1048 */ 1049 static void 1050 xpvtap_user_ring_fini(xpvtap_state_t *state) 1051 { 1052 ddi_umem_free(state->bt_user_ring.ur_cookie); 1053 } 1054 1055 1056 /* 1057 * xpvtap_user_thread_fini() 1058 */ 1059 static void 1060 xpvtap_user_thread_fini(xpvtap_state_t *state) 1061 { 1062 ddi_taskq_destroy(state->bt_thread.ut_taskq); 1063 cv_destroy(&state->bt_thread.ut_exit_done_cv); 1064 cv_destroy(&state->bt_thread.ut_wake_cv); 1065 mutex_destroy(&state->bt_thread.ut_mutex); 1066 } 1067 1068 1069 /* 1070 * xpvtap_user_thread() 1071 */ 1072 static void 1073 xpvtap_user_thread(void *arg) 1074 { 1075 xpvtap_user_thread_t *thread; 1076 blkif_response_t resp; 1077 xpvtap_state_t *state; 1078 blkif_request_t req; 1079 boolean_t b; 1080 uint_t uid; 1081 int e; 1082 1083 1084 state = (xpvtap_state_t *)arg; 1085 thread = &state->bt_thread; 1086 1087 xpvtap_thread_start: 1088 /* See if we are supposed to exit */ 1089 mutex_enter(&thread->ut_mutex); 1090 if (thread->ut_exit) { 1091 thread->ut_exit_done = B_TRUE; 1092 cv_signal(&state->bt_thread.ut_exit_done_cv); 1093 mutex_exit(&thread->ut_mutex); 1094 return; 1095 } 1096 1097 /* 1098 * if we aren't supposed to be awake, wait until someone wakes us. 1099 * when we wake up, check for a kill or someone telling us to exit. 1100 */ 1101 if (!thread->ut_wake) { 1102 e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex); 1103 if ((e == 0) || (thread->ut_exit)) { 1104 thread->ut_exit = B_TRUE; 1105 mutex_exit(&thread->ut_mutex); 1106 goto xpvtap_thread_start; 1107 } 1108 } 1109 1110 /* if someone didn't wake us, go back to the start of the thread */ 1111 if (!thread->ut_wake) { 1112 mutex_exit(&thread->ut_mutex); 1113 goto xpvtap_thread_start; 1114 } 1115 1116 /* we are awake */ 1117 thread->ut_wake = B_FALSE; 1118 mutex_exit(&thread->ut_mutex); 1119 1120 /* process requests from the guest */ 1121 do { 1122 /* 1123 * check for requests from the guest. if we don't have any, 1124 * break out of the loop. 1125 */ 1126 e = blk_ring_request_get(state->bt_guest_ring, &req); 1127 if (e == B_FALSE) { 1128 break; 1129 } 1130 1131 /* we got a request, map the grefs into the user app's VA */ 1132 e = xpvtap_user_request_map(state, &req, &uid); 1133 if (e != DDI_SUCCESS) { 1134 /* 1135 * If we couldn't map the request (e.g. user app hasn't 1136 * opened the device yet), requeue it and try again 1137 * later 1138 */ 1139 blk_ring_request_requeue(state->bt_guest_ring); 1140 break; 1141 } 1142 1143 /* push the request to the user app */ 1144 e = xpvtap_user_request_push(state, &req, uid); 1145 if (e != DDI_SUCCESS) { 1146 resp.id = req.id; 1147 resp.operation = req.operation; 1148 resp.status = BLKIF_RSP_ERROR; 1149 blk_ring_response_put(state->bt_guest_ring, &resp); 1150 } 1151 } while (!thread->ut_exit); 1152 1153 /* process reponses from the user app */ 1154 do { 1155 /* 1156 * check for responses from the user app. if we don't have any, 1157 * break out of the loop. 1158 */ 1159 b = xpvtap_user_response_get(state, &resp, &uid); 1160 if (b != B_TRUE) { 1161 break; 1162 } 1163 1164 /* 1165 * if we got a response, unmap the grefs from the matching 1166 * request. 1167 */ 1168 xpvtap_user_request_unmap(state, uid); 1169 1170 /* push the response to the guest */ 1171 blk_ring_response_put(state->bt_guest_ring, &resp); 1172 } while (!thread->ut_exit); 1173 1174 goto xpvtap_thread_start; 1175 } 1176 1177 1178 /* 1179 * xpvtap_user_request_map() 1180 */ 1181 static int 1182 xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req, 1183 uint_t *uid) 1184 { 1185 grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 1186 struct seg *seg; 1187 struct as *as; 1188 domid_t domid; 1189 caddr_t uaddr; 1190 uint_t flags; 1191 int i; 1192 int e; 1193 1194 1195 domid = xvdi_get_oeid(state->bt_dip); 1196 1197 as = state->bt_map.um_as; 1198 if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) { 1199 return (DDI_FAILURE); 1200 } 1201 1202 /* has to happen after segmap returns */ 1203 if (!state->bt_map.um_registered) { 1204 /* register the pte's with segmf */ 1205 e = xpvtap_segmf_register(state); 1206 if (e != DDI_SUCCESS) { 1207 return (DDI_FAILURE); 1208 } 1209 } 1210 1211 /* alloc an ID for the user ring */ 1212 e = xpvtap_rs_alloc(state->bt_map.um_rs, uid); 1213 if (e != DDI_SUCCESS) { 1214 return (DDI_FAILURE); 1215 } 1216 1217 /* if we don't have any segments to map, we're done */ 1218 if ((req->operation == BLKIF_OP_WRITE_BARRIER) || 1219 (req->operation == BLKIF_OP_FLUSH_DISKCACHE) || 1220 (req->nr_segments == 0)) { 1221 return (DDI_SUCCESS); 1222 } 1223 1224 /* get the apps gref address */ 1225 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid); 1226 1227 AS_LOCK_ENTER(as, RW_READER); 1228 seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 1229 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) > 1230 (seg->s_base + seg->s_size))) { 1231 AS_LOCK_EXIT(as); 1232 return (DDI_FAILURE); 1233 } 1234 1235 /* if we are reading from disk, we are writing into memory */ 1236 flags = 0; 1237 if (req->operation == BLKIF_OP_READ) { 1238 flags |= SEGMF_GREF_WR; 1239 } 1240 1241 /* Load the grefs into seg_mf */ 1242 for (i = 0; i < req->nr_segments; i++) { 1243 gref[i] = req->seg[i].gref; 1244 } 1245 (void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments, 1246 domid); 1247 1248 AS_LOCK_EXIT(as); 1249 1250 return (DDI_SUCCESS); 1251 } 1252 1253 1254 /* 1255 * xpvtap_user_request_push() 1256 */ 1257 static int 1258 xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req, 1259 uint_t uid) 1260 { 1261 blkif_request_t *outstanding_req; 1262 blkif_front_ring_t *uring; 1263 blkif_request_t *target; 1264 xpvtap_user_map_t *map; 1265 1266 1267 uring = &state->bt_user_ring.ur_ring; 1268 map = &state->bt_map; 1269 1270 target = RING_GET_REQUEST(uring, uring->req_prod_pvt); 1271 1272 /* 1273 * Save request from the frontend. used for ID mapping and unmap 1274 * on response/cleanup 1275 */ 1276 outstanding_req = &map->um_outstanding_reqs[uid]; 1277 bcopy(req, outstanding_req, sizeof (*outstanding_req)); 1278 1279 /* put the request on the user ring */ 1280 bcopy(req, target, sizeof (*req)); 1281 target->id = (uint64_t)uid; 1282 uring->req_prod_pvt++; 1283 1284 pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM); 1285 1286 return (DDI_SUCCESS); 1287 } 1288 1289 1290 static void 1291 xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid) 1292 { 1293 blkif_request_t *req; 1294 struct seg *seg; 1295 struct as *as; 1296 caddr_t uaddr; 1297 int e; 1298 1299 1300 as = state->bt_map.um_as; 1301 if (as == NULL) { 1302 return; 1303 } 1304 1305 /* get a copy of the original request */ 1306 req = &state->bt_map.um_outstanding_reqs[uid]; 1307 1308 /* unmap the grefs for this request */ 1309 if ((req->operation != BLKIF_OP_WRITE_BARRIER) && 1310 (req->operation != BLKIF_OP_FLUSH_DISKCACHE) && 1311 (req->nr_segments != 0)) { 1312 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid); 1313 AS_LOCK_ENTER(as, RW_READER); 1314 seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 1315 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) > 1316 (seg->s_base + seg->s_size))) { 1317 AS_LOCK_EXIT(as); 1318 xpvtap_rs_free(state->bt_map.um_rs, uid); 1319 return; 1320 } 1321 1322 e = segmf_release_grefs(seg, uaddr, req->nr_segments); 1323 if (e != 0) { 1324 cmn_err(CE_WARN, "unable to release grefs"); 1325 } 1326 1327 AS_LOCK_EXIT(as); 1328 } 1329 1330 /* free up the user ring id */ 1331 xpvtap_rs_free(state->bt_map.um_rs, uid); 1332 } 1333 1334 1335 static int 1336 xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp, 1337 uint_t *uid) 1338 { 1339 blkif_front_ring_t *uring; 1340 blkif_response_t *target; 1341 1342 1343 uring = &state->bt_user_ring.ur_ring; 1344 1345 if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) { 1346 return (B_FALSE); 1347 } 1348 1349 target = NULL; 1350 target = RING_GET_RESPONSE(uring, uring->rsp_cons); 1351 if (target == NULL) { 1352 return (B_FALSE); 1353 } 1354 1355 /* copy out the user app response */ 1356 bcopy(target, resp, sizeof (*resp)); 1357 uring->rsp_cons++; 1358 1359 /* restore the quests id from the original request */ 1360 *uid = (uint_t)resp->id; 1361 resp->id = state->bt_map.um_outstanding_reqs[*uid].id; 1362 1363 return (B_TRUE); 1364 } 1365 1366 1367 /* 1368 * xpvtap_user_app_stop() 1369 */ 1370 static void xpvtap_user_app_stop(caddr_t arg) 1371 { 1372 xpvtap_state_t *state; 1373 clock_t rc; 1374 1375 state = (xpvtap_state_t *)arg; 1376 1377 /* 1378 * Give the app 10 secs to exit. If it doesn't exit, it's not a serious 1379 * problem, we just won't auto-detach the driver. 1380 */ 1381 mutex_enter(&state->bt_open.bo_mutex); 1382 if (state->bt_open.bo_opened) { 1383 rc = cv_reltimedwait(&state->bt_open.bo_exit_cv, 1384 &state->bt_open.bo_mutex, drv_usectohz(10000000), 1385 TR_CLOCK_TICK); 1386 if (rc <= 0) { 1387 cmn_err(CE_NOTE, "!user process still has driver open, " 1388 "deferring detach\n"); 1389 } 1390 } 1391 mutex_exit(&state->bt_open.bo_mutex); 1392 } 1393 1394 1395 /* 1396 * xpvtap_rs_init() 1397 * Initialize the resource structure. init() returns a handle to be used 1398 * for the rest of the resource functions. This code is written assuming 1399 * that min_val will be close to 0. Therefore, we will allocate the free 1400 * buffer only taking max_val into account. 1401 */ 1402 static void 1403 xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle) 1404 { 1405 xpvtap_rs_t *rstruct; 1406 uint_t array_size; 1407 uint_t index; 1408 1409 1410 ASSERT(handle != NULL); 1411 ASSERT(min_val < max_val); 1412 1413 /* alloc space for resource structure */ 1414 rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP); 1415 1416 /* 1417 * Test to see if the max value is 64-bit aligned. If so, we don't need 1418 * to allocate an extra 64-bit word. alloc space for free buffer 1419 * (8 bytes per uint64_t). 1420 */ 1421 if ((max_val & 0x3F) == 0) { 1422 rstruct->rs_free_size = (max_val >> 6) * 8; 1423 } else { 1424 rstruct->rs_free_size = ((max_val >> 6) + 1) * 8; 1425 } 1426 rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP); 1427 1428 /* Initialize resource structure */ 1429 rstruct->rs_min = min_val; 1430 rstruct->rs_last = min_val; 1431 rstruct->rs_max = max_val; 1432 mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL); 1433 rstruct->rs_flushing = B_FALSE; 1434 1435 /* Mark all resources as free */ 1436 array_size = rstruct->rs_free_size >> 3; 1437 for (index = 0; index < array_size; index++) { 1438 rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF; 1439 } 1440 1441 /* setup handle which is returned from this function */ 1442 *handle = rstruct; 1443 } 1444 1445 1446 /* 1447 * xpvtap_rs_fini() 1448 * Frees up the space allocated in init(). Notice that a pointer to the 1449 * handle is used for the parameter. fini() will set the handle to NULL 1450 * before returning. 1451 */ 1452 static void 1453 xpvtap_rs_fini(xpvtap_rs_hdl_t *handle) 1454 { 1455 xpvtap_rs_t *rstruct; 1456 1457 1458 ASSERT(handle != NULL); 1459 1460 rstruct = (xpvtap_rs_t *)*handle; 1461 1462 mutex_destroy(&rstruct->rs_mutex); 1463 kmem_free(rstruct->rs_free, rstruct->rs_free_size); 1464 kmem_free(rstruct, sizeof (xpvtap_rs_t)); 1465 1466 /* set handle to null. This helps catch bugs. */ 1467 *handle = NULL; 1468 } 1469 1470 1471 /* 1472 * xpvtap_rs_alloc() 1473 * alloc a resource. If alloc fails, we are out of resources. 1474 */ 1475 static int 1476 xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource) 1477 { 1478 xpvtap_rs_t *rstruct; 1479 uint_t array_idx; 1480 uint64_t free; 1481 uint_t index; 1482 uint_t last; 1483 uint_t min; 1484 uint_t max; 1485 1486 1487 ASSERT(handle != NULL); 1488 ASSERT(resource != NULL); 1489 1490 rstruct = (xpvtap_rs_t *)handle; 1491 1492 mutex_enter(&rstruct->rs_mutex); 1493 min = rstruct->rs_min; 1494 max = rstruct->rs_max; 1495 1496 /* 1497 * Find a free resource. This will return out of the loop once it finds 1498 * a free resource. There are a total of 'max'-'min'+1 resources. 1499 * Performs a round robin allocation. 1500 */ 1501 for (index = min; index <= max; index++) { 1502 1503 array_idx = rstruct->rs_last >> 6; 1504 free = rstruct->rs_free[array_idx]; 1505 last = rstruct->rs_last & 0x3F; 1506 1507 /* if the next resource to check is free */ 1508 if ((free & ((uint64_t)1 << last)) != 0) { 1509 /* we are using this resource */ 1510 *resource = rstruct->rs_last; 1511 1512 /* take it out of the free list */ 1513 rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last); 1514 1515 /* 1516 * increment the last count so we start checking the 1517 * next resource on the next alloc(). Note the rollover 1518 * at 'max'+1. 1519 */ 1520 rstruct->rs_last++; 1521 if (rstruct->rs_last > max) { 1522 rstruct->rs_last = rstruct->rs_min; 1523 } 1524 1525 /* unlock the resource structure */ 1526 mutex_exit(&rstruct->rs_mutex); 1527 1528 return (DDI_SUCCESS); 1529 } 1530 1531 /* 1532 * This resource is not free, lets go to the next one. Note the 1533 * rollover at 'max'. 1534 */ 1535 rstruct->rs_last++; 1536 if (rstruct->rs_last > max) { 1537 rstruct->rs_last = rstruct->rs_min; 1538 } 1539 } 1540 1541 mutex_exit(&rstruct->rs_mutex); 1542 1543 return (DDI_FAILURE); 1544 } 1545 1546 1547 /* 1548 * xpvtap_rs_free() 1549 * Free the previously alloc'd resource. Once a resource has been free'd, 1550 * it can be used again when alloc is called. 1551 */ 1552 static void 1553 xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource) 1554 { 1555 xpvtap_rs_t *rstruct; 1556 uint_t array_idx; 1557 uint_t offset; 1558 1559 1560 ASSERT(handle != NULL); 1561 1562 rstruct = (xpvtap_rs_t *)handle; 1563 ASSERT(resource >= rstruct->rs_min); 1564 ASSERT(resource <= rstruct->rs_max); 1565 1566 if (!rstruct->rs_flushing) { 1567 mutex_enter(&rstruct->rs_mutex); 1568 } 1569 1570 /* Put the resource back in the free list */ 1571 array_idx = resource >> 6; 1572 offset = resource & 0x3F; 1573 rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset); 1574 1575 if (!rstruct->rs_flushing) { 1576 mutex_exit(&rstruct->rs_mutex); 1577 } 1578 } 1579 1580 1581 /* 1582 * xpvtap_rs_flush() 1583 */ 1584 static void 1585 xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback, 1586 void *arg) 1587 { 1588 xpvtap_rs_t *rstruct; 1589 uint_t array_idx; 1590 uint64_t free; 1591 uint_t index; 1592 uint_t last; 1593 uint_t min; 1594 uint_t max; 1595 1596 1597 ASSERT(handle != NULL); 1598 1599 rstruct = (xpvtap_rs_t *)handle; 1600 1601 mutex_enter(&rstruct->rs_mutex); 1602 min = rstruct->rs_min; 1603 max = rstruct->rs_max; 1604 1605 rstruct->rs_flushing = B_TRUE; 1606 1607 /* 1608 * for all resources not free, call the callback routine to clean it 1609 * up. 1610 */ 1611 for (index = min; index <= max; index++) { 1612 1613 array_idx = rstruct->rs_last >> 6; 1614 free = rstruct->rs_free[array_idx]; 1615 last = rstruct->rs_last & 0x3F; 1616 1617 /* if the next resource to check is not free */ 1618 if ((free & ((uint64_t)1 << last)) == 0) { 1619 /* call the callback to cleanup */ 1620 (*callback)(arg, rstruct->rs_last); 1621 1622 /* put it back in the free list */ 1623 rstruct->rs_free[array_idx] |= ((uint64_t)1 << last); 1624 } 1625 1626 /* go to the next one. Note the rollover at 'max' */ 1627 rstruct->rs_last++; 1628 if (rstruct->rs_last > max) { 1629 rstruct->rs_last = rstruct->rs_min; 1630 } 1631 } 1632 1633 mutex_exit(&rstruct->rs_mutex); 1634 } 1635