1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 28 #include <sys/errno.h> 29 #include <sys/types.h> 30 #include <sys/conf.h> 31 #include <sys/kmem.h> 32 #include <sys/ddi.h> 33 #include <sys/stat.h> 34 #include <sys/sunddi.h> 35 #include <sys/file.h> 36 #include <sys/open.h> 37 #include <sys/modctl.h> 38 #include <sys/ddi_impldefs.h> 39 #include <sys/sysmacros.h> 40 #include <sys/ddidevmap.h> 41 #include <sys/policy.h> 42 43 #include <sys/vmsystm.h> 44 #include <vm/hat_i86.h> 45 #include <vm/hat_pte.h> 46 #include <vm/seg_kmem.h> 47 #include <vm/seg_mf.h> 48 49 #include <xen/io/blkif_impl.h> 50 #include <xen/io/blk_common.h> 51 #include <xen/io/xpvtap.h> 52 53 54 static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred); 55 static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred); 56 static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 57 cred_t *cred, int *rval); 58 static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, 59 size_t len, size_t *maplen, uint_t model); 60 static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp, 61 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags, 62 cred_t *cred_p); 63 static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 64 struct pollhead **phpp); 65 66 static struct cb_ops xpvtap_cb_ops = { 67 xpvtap_open, /* cb_open */ 68 xpvtap_close, /* cb_close */ 69 nodev, /* cb_strategy */ 70 nodev, /* cb_print */ 71 nodev, /* cb_dump */ 72 nodev, /* cb_read */ 73 nodev, /* cb_write */ 74 xpvtap_ioctl, /* cb_ioctl */ 75 xpvtap_devmap, /* cb_devmap */ 76 nodev, /* cb_mmap */ 77 xpvtap_segmap, /* cb_segmap */ 78 xpvtap_chpoll, /* cb_chpoll */ 79 ddi_prop_op, /* cb_prop_op */ 80 NULL, /* cb_stream */ 81 D_NEW | D_MP | D_64BIT | D_DEVMAP, /* cb_flag */ 82 CB_REV 83 }; 84 85 static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, 86 void **result); 87 static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd); 88 static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd); 89 90 static struct dev_ops xpvtap_dev_ops = { 91 DEVO_REV, /* devo_rev */ 92 0, /* devo_refcnt */ 93 xpvtap_getinfo, /* devo_getinfo */ 94 nulldev, /* devo_identify */ 95 nulldev, /* devo_probe */ 96 xpvtap_attach, /* devo_attach */ 97 xpvtap_detach, /* devo_detach */ 98 nodev, /* devo_reset */ 99 &xpvtap_cb_ops, /* devo_cb_ops */ 100 NULL, /* devo_bus_ops */ 101 NULL /* power */ 102 }; 103 104 105 static struct modldrv xpvtap_modldrv = { 106 &mod_driverops, /* Type of module. This one is a driver */ 107 "xpvtap driver", /* Name of the module. */ 108 &xpvtap_dev_ops, /* driver ops */ 109 }; 110 111 static struct modlinkage xpvtap_modlinkage = { 112 MODREV_1, 113 (void *) &xpvtap_modldrv, 114 NULL 115 }; 116 117 118 void *xpvtap_statep; 119 120 121 static xpvtap_state_t *xpvtap_drv_init(int instance); 122 static void xpvtap_drv_fini(xpvtap_state_t *state); 123 static uint_t xpvtap_intr(caddr_t arg); 124 125 typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs); 126 static void xpvtap_rs_init(uint_t min_val, uint_t max_val, 127 xpvtap_rs_hdl_t *handle); 128 static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle); 129 static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs); 130 static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs); 131 static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle, 132 xpvtap_rs_cleanup_t callback, void *arg); 133 134 static int xpvtap_segmf_register(xpvtap_state_t *state); 135 static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event); 136 137 static int xpvtap_user_init(xpvtap_state_t *state); 138 static void xpvtap_user_fini(xpvtap_state_t *state); 139 static int xpvtap_user_ring_init(xpvtap_state_t *state); 140 static void xpvtap_user_ring_fini(xpvtap_state_t *state); 141 static int xpvtap_user_thread_init(xpvtap_state_t *state); 142 static void xpvtap_user_thread_fini(xpvtap_state_t *state); 143 static void xpvtap_user_thread_start(caddr_t arg); 144 static void xpvtap_user_thread_stop(xpvtap_state_t *state); 145 static void xpvtap_user_thread(void *arg); 146 147 static void xpvtap_user_app_stop(caddr_t arg); 148 149 static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req, 150 uint_t *uid); 151 static int xpvtap_user_request_push(xpvtap_state_t *state, 152 blkif_request_t *req, uint_t uid); 153 static int xpvtap_user_response_get(xpvtap_state_t *state, 154 blkif_response_t *resp, uint_t *uid); 155 static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid); 156 157 158 /* 159 * _init() 160 */ 161 int 162 _init(void) 163 { 164 int e; 165 166 e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1); 167 if (e != 0) { 168 return (e); 169 } 170 171 e = mod_install(&xpvtap_modlinkage); 172 if (e != 0) { 173 ddi_soft_state_fini(&xpvtap_statep); 174 return (e); 175 } 176 177 return (0); 178 } 179 180 181 /* 182 * _info() 183 */ 184 int 185 _info(struct modinfo *modinfop) 186 { 187 return (mod_info(&xpvtap_modlinkage, modinfop)); 188 } 189 190 191 /* 192 * _fini() 193 */ 194 int 195 _fini(void) 196 { 197 int e; 198 199 e = mod_remove(&xpvtap_modlinkage); 200 if (e != 0) { 201 return (e); 202 } 203 204 ddi_soft_state_fini(&xpvtap_statep); 205 206 return (0); 207 } 208 209 210 /* 211 * xpvtap_attach() 212 */ 213 static int 214 xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 215 { 216 blk_ringinit_args_t args; 217 xpvtap_state_t *state; 218 int instance; 219 int e; 220 221 222 switch (cmd) { 223 case DDI_ATTACH: 224 break; 225 226 case DDI_RESUME: 227 return (DDI_SUCCESS); 228 229 default: 230 return (DDI_FAILURE); 231 } 232 233 /* initialize our state info */ 234 instance = ddi_get_instance(dip); 235 state = xpvtap_drv_init(instance); 236 if (state == NULL) { 237 return (DDI_FAILURE); 238 } 239 state->bt_dip = dip; 240 241 /* Initialize the guest ring */ 242 args.ar_dip = state->bt_dip; 243 args.ar_intr = xpvtap_intr; 244 args.ar_intr_arg = (caddr_t)state; 245 args.ar_ringup = xpvtap_user_thread_start; 246 args.ar_ringup_arg = (caddr_t)state; 247 args.ar_ringdown = xpvtap_user_app_stop; 248 args.ar_ringdown_arg = (caddr_t)state; 249 e = blk_ring_init(&args, &state->bt_guest_ring); 250 if (e != DDI_SUCCESS) { 251 goto attachfail_ringinit; 252 } 253 254 /* create the minor node (for ioctl/mmap) */ 255 e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance, 256 DDI_PSEUDO, 0); 257 if (e != DDI_SUCCESS) { 258 goto attachfail_minor_node; 259 } 260 261 /* Report that driver was loaded */ 262 ddi_report_dev(dip); 263 264 return (DDI_SUCCESS); 265 266 attachfail_minor_node: 267 blk_ring_fini(&state->bt_guest_ring); 268 attachfail_ringinit: 269 xpvtap_drv_fini(state); 270 return (DDI_FAILURE); 271 } 272 273 274 /* 275 * xpvtap_detach() 276 */ 277 static int 278 xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 279 { 280 xpvtap_state_t *state; 281 int instance; 282 283 284 instance = ddi_get_instance(dip); 285 state = ddi_get_soft_state(xpvtap_statep, instance); 286 if (state == NULL) { 287 return (DDI_FAILURE); 288 } 289 290 switch (cmd) { 291 case DDI_DETACH: 292 break; 293 294 case DDI_SUSPEND: 295 default: 296 return (DDI_FAILURE); 297 } 298 299 xpvtap_user_thread_stop(state); 300 blk_ring_fini(&state->bt_guest_ring); 301 xpvtap_drv_fini(state); 302 ddi_remove_minor_node(dip, NULL); 303 304 return (DDI_SUCCESS); 305 } 306 307 308 /* 309 * xpvtap_getinfo() 310 */ 311 /*ARGSUSED*/ 312 static int 313 xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 314 { 315 xpvtap_state_t *state; 316 int instance; 317 dev_t dev; 318 int e; 319 320 321 dev = (dev_t)arg; 322 instance = getminor(dev); 323 324 switch (cmd) { 325 case DDI_INFO_DEVT2DEVINFO: 326 state = ddi_get_soft_state(xpvtap_statep, instance); 327 if (state == NULL) { 328 return (DDI_FAILURE); 329 } 330 *result = (void *)state->bt_dip; 331 e = DDI_SUCCESS; 332 break; 333 334 case DDI_INFO_DEVT2INSTANCE: 335 *result = (void *)(uintptr_t)instance; 336 e = DDI_SUCCESS; 337 break; 338 339 default: 340 e = DDI_FAILURE; 341 break; 342 } 343 344 return (e); 345 } 346 347 348 /* 349 * xpvtap_open() 350 */ 351 /*ARGSUSED*/ 352 static int 353 xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred) 354 { 355 xpvtap_state_t *state; 356 int instance; 357 358 359 if (secpolicy_xvm_control(cred)) { 360 return (EPERM); 361 } 362 363 instance = getminor(*devp); 364 state = ddi_get_soft_state(xpvtap_statep, instance); 365 if (state == NULL) { 366 return (ENXIO); 367 } 368 369 /* we should only be opened once */ 370 mutex_enter(&state->bt_open.bo_mutex); 371 if (state->bt_open.bo_opened) { 372 mutex_exit(&state->bt_open.bo_mutex); 373 return (EBUSY); 374 } 375 state->bt_open.bo_opened = B_TRUE; 376 mutex_exit(&state->bt_open.bo_mutex); 377 378 /* 379 * save the apps address space. need it for mapping/unmapping grefs 380 * since will be doing it in a separate kernel thread. 381 */ 382 state->bt_map.um_as = curproc->p_as; 383 384 return (0); 385 } 386 387 388 /* 389 * xpvtap_close() 390 */ 391 /*ARGSUSED*/ 392 static int 393 xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred) 394 { 395 xpvtap_state_t *state; 396 int instance; 397 398 399 instance = getminor(devp); 400 state = ddi_get_soft_state(xpvtap_statep, instance); 401 if (state == NULL) { 402 return (ENXIO); 403 } 404 405 /* 406 * wake thread so it can cleanup and wait for it to exit so we can 407 * be sure it's not in the middle of processing a request/response. 408 */ 409 mutex_enter(&state->bt_thread.ut_mutex); 410 state->bt_thread.ut_wake = B_TRUE; 411 state->bt_thread.ut_exit = B_TRUE; 412 cv_signal(&state->bt_thread.ut_wake_cv); 413 if (!state->bt_thread.ut_exit_done) { 414 cv_wait(&state->bt_thread.ut_exit_done_cv, 415 &state->bt_thread.ut_mutex); 416 } 417 ASSERT(state->bt_thread.ut_exit_done); 418 mutex_exit(&state->bt_thread.ut_mutex); 419 420 state->bt_map.um_as = NULL; 421 state->bt_map.um_guest_pages = NULL; 422 423 /* 424 * when the ring is brought down, a userland hotplug script is run 425 * which tries to bring the userland app down. We'll wait for a bit 426 * for the user app to exit. Notify the thread waiting that the app 427 * has closed the driver. 428 */ 429 mutex_enter(&state->bt_open.bo_mutex); 430 ASSERT(state->bt_open.bo_opened); 431 state->bt_open.bo_opened = B_FALSE; 432 cv_signal(&state->bt_open.bo_exit_cv); 433 mutex_exit(&state->bt_open.bo_mutex); 434 435 return (0); 436 } 437 438 439 /* 440 * xpvtap_ioctl() 441 */ 442 /*ARGSUSED*/ 443 static int 444 xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, 445 int *rval) 446 { 447 xpvtap_state_t *state; 448 int instance; 449 450 451 if (secpolicy_xvm_control(cred)) { 452 return (EPERM); 453 } 454 455 instance = getminor(dev); 456 if (instance == -1) { 457 return (EBADF); 458 } 459 460 state = ddi_get_soft_state(xpvtap_statep, instance); 461 if (state == NULL) { 462 return (EBADF); 463 } 464 465 switch (cmd) { 466 case XPVTAP_IOCTL_RESP_PUSH: 467 /* 468 * wake thread, thread handles guest requests and user app 469 * responses. 470 */ 471 mutex_enter(&state->bt_thread.ut_mutex); 472 state->bt_thread.ut_wake = B_TRUE; 473 cv_signal(&state->bt_thread.ut_wake_cv); 474 mutex_exit(&state->bt_thread.ut_mutex); 475 break; 476 477 default: 478 cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd); 479 return (ENXIO); 480 } 481 482 return (0); 483 } 484 485 486 /* 487 * xpvtap_segmap() 488 */ 489 /*ARGSUSED*/ 490 static int 491 xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp, 492 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags, 493 cred_t *cred_p) 494 { 495 struct segmf_crargs a; 496 xpvtap_state_t *state; 497 int instance; 498 int e; 499 500 501 if (secpolicy_xvm_control(cred_p)) { 502 return (EPERM); 503 } 504 505 instance = getminor(dev); 506 state = ddi_get_soft_state(xpvtap_statep, instance); 507 if (state == NULL) { 508 return (EBADF); 509 } 510 511 /* the user app should be doing a MAP_SHARED mapping */ 512 if ((flags & MAP_TYPE) != MAP_SHARED) { 513 return (EINVAL); 514 } 515 516 /* 517 * if this is the user ring (offset = 0), devmap it (which ends up in 518 * xpvtap_devmap). devmap will alloc and map the ring into the 519 * app's VA space. 520 */ 521 if (off == 0) { 522 e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len, 523 prot, maxprot, flags, cred_p); 524 return (e); 525 } 526 527 /* this should be the mmap for the gref pages (offset = PAGESIZE) */ 528 if (off != PAGESIZE) { 529 return (EINVAL); 530 } 531 532 /* make sure we get the size we're expecting */ 533 if (len != XPVTAP_GREF_BUFSIZE) { 534 return (EINVAL); 535 } 536 537 /* 538 * reserve user app VA space for the gref pages and use segmf to 539 * manage the backing store for the physical memory. segmf will 540 * map in/out the grefs and fault them in/out. 541 */ 542 ASSERT(asp == state->bt_map.um_as); 543 as_rangelock(asp); 544 if ((flags & MAP_FIXED) == 0) { 545 map_addr(addrp, len, 0, 0, flags); 546 if (*addrp == NULL) { 547 as_rangeunlock(asp); 548 return (ENOMEM); 549 } 550 } else { 551 /* User specified address */ 552 (void) as_unmap(asp, *addrp, len); 553 } 554 a.dev = dev; 555 a.prot = (uchar_t)prot; 556 a.maxprot = (uchar_t)maxprot; 557 e = as_map(asp, *addrp, len, segmf_create, &a); 558 if (e != 0) { 559 as_rangeunlock(asp); 560 return (e); 561 } 562 as_rangeunlock(asp); 563 564 /* 565 * Stash user base address, and compute address where the request 566 * array will end up. 567 */ 568 state->bt_map.um_guest_pages = (caddr_t)*addrp; 569 state->bt_map.um_guest_size = (size_t)len; 570 571 /* register an as callback so we can cleanup when the app goes away */ 572 e = as_add_callback(asp, xpvtap_segmf_unregister, state, 573 AS_UNMAP_EVENT, *addrp, len, KM_SLEEP); 574 if (e != 0) { 575 (void) as_unmap(asp, *addrp, len); 576 return (EINVAL); 577 } 578 579 /* wake thread to see if there are requests already queued up */ 580 mutex_enter(&state->bt_thread.ut_mutex); 581 state->bt_thread.ut_wake = B_TRUE; 582 cv_signal(&state->bt_thread.ut_wake_cv); 583 mutex_exit(&state->bt_thread.ut_mutex); 584 585 return (0); 586 } 587 588 589 /* 590 * xpvtap_devmap() 591 */ 592 /*ARGSUSED*/ 593 static int 594 xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len, 595 size_t *maplen, uint_t model) 596 { 597 xpvtap_user_ring_t *usring; 598 xpvtap_state_t *state; 599 int instance; 600 int e; 601 602 603 instance = getminor(dev); 604 state = ddi_get_soft_state(xpvtap_statep, instance); 605 if (state == NULL) { 606 return (EBADF); 607 } 608 609 /* we should only get here if the offset was == 0 */ 610 if (off != 0) { 611 return (EINVAL); 612 } 613 614 /* we should only be mapping in one page */ 615 if (len != PAGESIZE) { 616 return (EINVAL); 617 } 618 619 /* 620 * we already allocated the user ring during driver attach, all we 621 * need to do is map it into the user app's VA. 622 */ 623 usring = &state->bt_user_ring; 624 e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0, 625 PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL); 626 if (e < 0) { 627 return (e); 628 } 629 630 /* return the size to compete the devmap */ 631 *maplen = PAGESIZE; 632 633 return (0); 634 } 635 636 637 /* 638 * xpvtap_chpoll() 639 */ 640 static int 641 xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 642 struct pollhead **phpp) 643 { 644 xpvtap_user_ring_t *usring; 645 xpvtap_state_t *state; 646 int instance; 647 648 649 instance = getminor(dev); 650 if (instance == -1) { 651 return (EBADF); 652 } 653 state = ddi_get_soft_state(xpvtap_statep, instance); 654 if (state == NULL) { 655 return (EBADF); 656 } 657 658 if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) { 659 *reventsp = 0; 660 return (EINVAL); 661 } 662 663 /* 664 * if we pushed requests on the user ring since the last poll, wakeup 665 * the user app 666 */ 667 usring = &state->bt_user_ring; 668 if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) { 669 670 /* 671 * XXX - is this faster here or xpvtap_user_request_push?? 672 * prelim data says here. Because less membars or because 673 * user thread will spin in poll requests before getting to 674 * responses? 675 */ 676 RING_PUSH_REQUESTS(&usring->ur_ring); 677 678 usring->ur_prod_polled = usring->ur_ring.sring->req_prod; 679 *reventsp = POLLIN | POLLRDNORM; 680 681 /* no new requests */ 682 } else { 683 *reventsp = 0; 684 if (!anyyet) { 685 *phpp = &state->bt_pollhead; 686 } 687 } 688 689 return (0); 690 } 691 692 693 /* 694 * xpvtap_drv_init() 695 */ 696 static xpvtap_state_t * 697 xpvtap_drv_init(int instance) 698 { 699 xpvtap_state_t *state; 700 int e; 701 702 703 e = ddi_soft_state_zalloc(xpvtap_statep, instance); 704 if (e != DDI_SUCCESS) { 705 return (NULL); 706 } 707 state = ddi_get_soft_state(xpvtap_statep, instance); 708 if (state == NULL) { 709 goto drvinitfail_get_soft_state; 710 } 711 712 state->bt_instance = instance; 713 mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL); 714 cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL); 715 state->bt_open.bo_opened = B_FALSE; 716 state->bt_map.um_registered = B_FALSE; 717 718 /* initialize user ring, thread, mapping state */ 719 e = xpvtap_user_init(state); 720 if (e != DDI_SUCCESS) { 721 goto drvinitfail_userinit; 722 } 723 724 return (state); 725 726 drvinitfail_userinit: 727 cv_destroy(&state->bt_open.bo_exit_cv); 728 mutex_destroy(&state->bt_open.bo_mutex); 729 drvinitfail_get_soft_state: 730 (void) ddi_soft_state_free(xpvtap_statep, instance); 731 return (NULL); 732 } 733 734 735 /* 736 * xpvtap_drv_fini() 737 */ 738 static void 739 xpvtap_drv_fini(xpvtap_state_t *state) 740 { 741 xpvtap_user_fini(state); 742 cv_destroy(&state->bt_open.bo_exit_cv); 743 mutex_destroy(&state->bt_open.bo_mutex); 744 (void) ddi_soft_state_free(xpvtap_statep, state->bt_instance); 745 } 746 747 748 /* 749 * xpvtap_intr() 750 * this routine will be called when we have a request on the guest ring. 751 */ 752 static uint_t 753 xpvtap_intr(caddr_t arg) 754 { 755 xpvtap_state_t *state; 756 757 758 state = (xpvtap_state_t *)arg; 759 760 /* wake thread, thread handles guest requests and user app responses */ 761 mutex_enter(&state->bt_thread.ut_mutex); 762 state->bt_thread.ut_wake = B_TRUE; 763 cv_signal(&state->bt_thread.ut_wake_cv); 764 mutex_exit(&state->bt_thread.ut_mutex); 765 766 return (DDI_INTR_CLAIMED); 767 } 768 769 770 /* 771 * xpvtap_segmf_register() 772 */ 773 static int 774 xpvtap_segmf_register(xpvtap_state_t *state) 775 { 776 struct seg *seg; 777 uint64_t pte_ma; 778 struct as *as; 779 caddr_t uaddr; 780 uint_t pgcnt; 781 int i; 782 783 784 as = state->bt_map.um_as; 785 pgcnt = btopr(state->bt_map.um_guest_size); 786 uaddr = state->bt_map.um_guest_pages; 787 788 if (pgcnt == 0) { 789 return (DDI_FAILURE); 790 } 791 792 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 793 794 seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 795 if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) > 796 (seg->s_base + seg->s_size))) { 797 AS_LOCK_EXIT(as, &as->a_lock); 798 return (DDI_FAILURE); 799 } 800 801 /* 802 * lock down the htables so the HAT can't steal them. Register the 803 * PTE MA's for each gref page with seg_mf so we can do user space 804 * gref mappings. 805 */ 806 for (i = 0; i < pgcnt; i++) { 807 hat_prepare_mapping(as->a_hat, uaddr, &pte_ma); 808 hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0, 809 PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK, 810 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 811 hat_release_mapping(as->a_hat, uaddr); 812 segmf_add_gref_pte(seg, uaddr, pte_ma); 813 uaddr += PAGESIZE; 814 } 815 816 state->bt_map.um_registered = B_TRUE; 817 818 AS_LOCK_EXIT(as, &as->a_lock); 819 820 return (DDI_SUCCESS); 821 } 822 823 824 /* 825 * xpvtap_segmf_unregister() 826 * as_callback routine 827 */ 828 /*ARGSUSED*/ 829 static void 830 xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event) 831 { 832 xpvtap_state_t *state; 833 caddr_t uaddr; 834 uint_t pgcnt; 835 int i; 836 837 838 state = (xpvtap_state_t *)arg; 839 if (!state->bt_map.um_registered) { 840 /* remove the callback (which is this routine) */ 841 (void) as_delete_callback(as, arg); 842 return; 843 } 844 845 pgcnt = btopr(state->bt_map.um_guest_size); 846 uaddr = state->bt_map.um_guest_pages; 847 848 /* unmap any outstanding req's grefs */ 849 xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state); 850 851 /* Unlock the gref pages */ 852 for (i = 0; i < pgcnt; i++) { 853 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 854 hat_prepare_mapping(as->a_hat, uaddr, NULL); 855 hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK); 856 hat_release_mapping(as->a_hat, uaddr); 857 AS_LOCK_EXIT(as, &as->a_lock); 858 uaddr += PAGESIZE; 859 } 860 861 /* remove the callback (which is this routine) */ 862 (void) as_delete_callback(as, arg); 863 864 state->bt_map.um_registered = B_FALSE; 865 } 866 867 868 /* 869 * xpvtap_user_init() 870 */ 871 static int 872 xpvtap_user_init(xpvtap_state_t *state) 873 { 874 xpvtap_user_map_t *map; 875 int e; 876 877 878 map = &state->bt_map; 879 880 /* Setup the ring between the driver and user app */ 881 e = xpvtap_user_ring_init(state); 882 if (e != DDI_SUCCESS) { 883 return (DDI_FAILURE); 884 } 885 886 /* 887 * the user ring can handle BLKIF_RING_SIZE outstanding requests. This 888 * is the same number of requests as the guest ring. Initialize the 889 * state we use to track request IDs to the user app. These IDs will 890 * also identify which group of gref pages correspond with the 891 * request. 892 */ 893 xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs); 894 895 /* 896 * allocate the space to store a copy of each outstanding requests. We 897 * will need to reference the ID and the number of segments when we 898 * get the response from the user app. 899 */ 900 map->um_outstanding_reqs = kmem_zalloc( 901 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE, 902 KM_SLEEP); 903 904 /* 905 * initialize the thread we use to process guest requests and user 906 * responses. 907 */ 908 e = xpvtap_user_thread_init(state); 909 if (e != DDI_SUCCESS) { 910 goto userinitfail_user_thread_init; 911 } 912 913 return (DDI_SUCCESS); 914 915 userinitfail_user_thread_init: 916 xpvtap_rs_fini(&map->um_rs); 917 kmem_free(map->um_outstanding_reqs, 918 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE); 919 xpvtap_user_ring_fini(state); 920 return (DDI_FAILURE); 921 } 922 923 924 /* 925 * xpvtap_user_ring_init() 926 */ 927 static int 928 xpvtap_user_ring_init(xpvtap_state_t *state) 929 { 930 xpvtap_user_ring_t *usring; 931 932 933 usring = &state->bt_user_ring; 934 935 /* alocate and initialize the page for the shared user ring */ 936 usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE, 937 DDI_UMEM_SLEEP, &usring->ur_cookie); 938 SHARED_RING_INIT(usring->ur_sring); 939 FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE); 940 usring->ur_prod_polled = 0; 941 942 return (DDI_SUCCESS); 943 } 944 945 946 /* 947 * xpvtap_user_thread_init() 948 */ 949 static int 950 xpvtap_user_thread_init(xpvtap_state_t *state) 951 { 952 xpvtap_user_thread_t *thread; 953 char taskqname[32]; 954 955 956 thread = &state->bt_thread; 957 958 mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL); 959 cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL); 960 cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL); 961 thread->ut_wake = B_FALSE; 962 thread->ut_exit = B_FALSE; 963 thread->ut_exit_done = B_TRUE; 964 965 /* create but don't start the user thread */ 966 (void) sprintf(taskqname, "xvptap_%d", state->bt_instance); 967 thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1, 968 TASKQ_DEFAULTPRI, 0); 969 if (thread->ut_taskq == NULL) { 970 goto userinitthrfail_taskq_create; 971 } 972 973 return (DDI_SUCCESS); 974 975 userinitthrfail_taskq_dispatch: 976 ddi_taskq_destroy(thread->ut_taskq); 977 userinitthrfail_taskq_create: 978 cv_destroy(&thread->ut_exit_done_cv); 979 cv_destroy(&thread->ut_wake_cv); 980 mutex_destroy(&thread->ut_mutex); 981 982 return (DDI_FAILURE); 983 } 984 985 986 /* 987 * xpvtap_user_thread_start() 988 */ 989 static void 990 xpvtap_user_thread_start(caddr_t arg) 991 { 992 xpvtap_user_thread_t *thread; 993 xpvtap_state_t *state; 994 int e; 995 996 997 state = (xpvtap_state_t *)arg; 998 thread = &state->bt_thread; 999 1000 /* start the user thread */ 1001 thread->ut_exit_done = B_FALSE; 1002 e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state, 1003 DDI_SLEEP); 1004 if (e != DDI_SUCCESS) { 1005 thread->ut_exit_done = B_TRUE; 1006 cmn_err(CE_WARN, "Unable to start user thread\n"); 1007 } 1008 } 1009 1010 1011 /* 1012 * xpvtap_user_thread_stop() 1013 */ 1014 static void 1015 xpvtap_user_thread_stop(xpvtap_state_t *state) 1016 { 1017 /* wake thread so it can exit */ 1018 mutex_enter(&state->bt_thread.ut_mutex); 1019 state->bt_thread.ut_wake = B_TRUE; 1020 state->bt_thread.ut_exit = B_TRUE; 1021 cv_signal(&state->bt_thread.ut_wake_cv); 1022 if (!state->bt_thread.ut_exit_done) { 1023 cv_wait(&state->bt_thread.ut_exit_done_cv, 1024 &state->bt_thread.ut_mutex); 1025 } 1026 mutex_exit(&state->bt_thread.ut_mutex); 1027 ASSERT(state->bt_thread.ut_exit_done); 1028 } 1029 1030 1031 /* 1032 * xpvtap_user_fini() 1033 */ 1034 static void 1035 xpvtap_user_fini(xpvtap_state_t *state) 1036 { 1037 xpvtap_user_map_t *map; 1038 1039 1040 map = &state->bt_map; 1041 1042 xpvtap_user_thread_fini(state); 1043 xpvtap_rs_fini(&map->um_rs); 1044 kmem_free(map->um_outstanding_reqs, 1045 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE); 1046 xpvtap_user_ring_fini(state); 1047 } 1048 1049 1050 /* 1051 * xpvtap_user_ring_fini() 1052 */ 1053 static void 1054 xpvtap_user_ring_fini(xpvtap_state_t *state) 1055 { 1056 ddi_umem_free(state->bt_user_ring.ur_cookie); 1057 } 1058 1059 1060 /* 1061 * xpvtap_user_thread_fini() 1062 */ 1063 static void 1064 xpvtap_user_thread_fini(xpvtap_state_t *state) 1065 { 1066 ddi_taskq_destroy(state->bt_thread.ut_taskq); 1067 cv_destroy(&state->bt_thread.ut_exit_done_cv); 1068 cv_destroy(&state->bt_thread.ut_wake_cv); 1069 mutex_destroy(&state->bt_thread.ut_mutex); 1070 } 1071 1072 1073 /* 1074 * xpvtap_user_thread() 1075 */ 1076 static void 1077 xpvtap_user_thread(void *arg) 1078 { 1079 xpvtap_user_thread_t *thread; 1080 blkif_response_t resp; 1081 xpvtap_state_t *state; 1082 blkif_request_t req; 1083 boolean_t b; 1084 uint_t uid; 1085 int e; 1086 1087 1088 state = (xpvtap_state_t *)arg; 1089 thread = &state->bt_thread; 1090 1091 xpvtap_thread_start: 1092 /* See if we are supposed to exit */ 1093 mutex_enter(&thread->ut_mutex); 1094 if (thread->ut_exit) { 1095 thread->ut_exit_done = B_TRUE; 1096 cv_signal(&state->bt_thread.ut_exit_done_cv); 1097 mutex_exit(&thread->ut_mutex); 1098 return; 1099 } 1100 1101 /* 1102 * if we aren't supposed to be awake, wait until someone wakes us. 1103 * when we wake up, check for a kill or someone telling us to exit. 1104 */ 1105 if (!thread->ut_wake) { 1106 e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex); 1107 if ((e == 0) || (thread->ut_exit)) { 1108 thread->ut_exit = B_TRUE; 1109 mutex_exit(&thread->ut_mutex); 1110 goto xpvtap_thread_start; 1111 } 1112 } 1113 1114 /* if someone didn't wake us, go back to the start of the thread */ 1115 if (!thread->ut_wake) { 1116 mutex_exit(&thread->ut_mutex); 1117 goto xpvtap_thread_start; 1118 } 1119 1120 /* we are awake */ 1121 thread->ut_wake = B_FALSE; 1122 mutex_exit(&thread->ut_mutex); 1123 1124 /* process requests from the guest */ 1125 do { 1126 /* 1127 * check for requests from the guest. if we don't have any, 1128 * break out of the loop. 1129 */ 1130 e = blk_ring_request_get(state->bt_guest_ring, &req); 1131 if (e == B_FALSE) { 1132 break; 1133 } 1134 1135 /* we got a request, map the grefs into the user app's VA */ 1136 e = xpvtap_user_request_map(state, &req, &uid); 1137 if (e != DDI_SUCCESS) { 1138 /* 1139 * If we couldn't map the request (e.g. user app hasn't 1140 * opened the device yet), requeue it and try again 1141 * later 1142 */ 1143 blk_ring_request_requeue(state->bt_guest_ring); 1144 break; 1145 } 1146 1147 /* push the request to the user app */ 1148 e = xpvtap_user_request_push(state, &req, uid); 1149 if (e != DDI_SUCCESS) { 1150 resp.id = req.id; 1151 resp.operation = req.operation; 1152 resp.status = BLKIF_RSP_ERROR; 1153 blk_ring_response_put(state->bt_guest_ring, &resp); 1154 } 1155 } while (!thread->ut_exit); 1156 1157 /* process reponses from the user app */ 1158 do { 1159 /* 1160 * check for responses from the user app. if we don't have any, 1161 * break out of the loop. 1162 */ 1163 b = xpvtap_user_response_get(state, &resp, &uid); 1164 if (b != B_TRUE) { 1165 break; 1166 } 1167 1168 /* 1169 * if we got a response, unmap the grefs from the matching 1170 * request. 1171 */ 1172 xpvtap_user_request_unmap(state, uid); 1173 1174 /* push the response to the guest */ 1175 blk_ring_response_put(state->bt_guest_ring, &resp); 1176 } while (!thread->ut_exit); 1177 1178 goto xpvtap_thread_start; 1179 } 1180 1181 1182 /* 1183 * xpvtap_user_request_map() 1184 */ 1185 static int 1186 xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req, 1187 uint_t *uid) 1188 { 1189 grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 1190 struct seg *seg; 1191 struct as *as; 1192 domid_t domid; 1193 caddr_t uaddr; 1194 uint_t flags; 1195 int i; 1196 int e; 1197 1198 1199 domid = xvdi_get_oeid(state->bt_dip); 1200 1201 as = state->bt_map.um_as; 1202 if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) { 1203 return (DDI_FAILURE); 1204 } 1205 1206 /* has to happen after segmap returns */ 1207 if (!state->bt_map.um_registered) { 1208 /* register the pte's with segmf */ 1209 e = xpvtap_segmf_register(state); 1210 if (e != DDI_SUCCESS) { 1211 return (DDI_FAILURE); 1212 } 1213 } 1214 1215 /* alloc an ID for the user ring */ 1216 e = xpvtap_rs_alloc(state->bt_map.um_rs, uid); 1217 if (e != DDI_SUCCESS) { 1218 return (DDI_FAILURE); 1219 } 1220 1221 /* if we don't have any segments to map, we're done */ 1222 if ((req->operation == BLKIF_OP_WRITE_BARRIER) || 1223 (req->operation == BLKIF_OP_FLUSH_DISKCACHE) || 1224 (req->nr_segments == 0)) { 1225 return (DDI_SUCCESS); 1226 } 1227 1228 /* get the apps gref address */ 1229 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid); 1230 1231 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1232 seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 1233 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) > 1234 (seg->s_base + seg->s_size))) { 1235 AS_LOCK_EXIT(as, &as->a_lock); 1236 return (DDI_FAILURE); 1237 } 1238 1239 /* if we are reading from disk, we are writing into memory */ 1240 flags = 0; 1241 if (req->operation == BLKIF_OP_READ) { 1242 flags |= SEGMF_GREF_WR; 1243 } 1244 1245 /* Load the grefs into seg_mf */ 1246 for (i = 0; i < req->nr_segments; i++) { 1247 gref[i] = req->seg[i].gref; 1248 } 1249 (void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments, 1250 domid); 1251 1252 AS_LOCK_EXIT(as, &as->a_lock); 1253 1254 return (DDI_SUCCESS); 1255 } 1256 1257 1258 /* 1259 * xpvtap_user_request_push() 1260 */ 1261 static int 1262 xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req, 1263 uint_t uid) 1264 { 1265 blkif_request_t *outstanding_req; 1266 blkif_front_ring_t *uring; 1267 blkif_request_t *target; 1268 xpvtap_user_map_t *map; 1269 1270 1271 uring = &state->bt_user_ring.ur_ring; 1272 map = &state->bt_map; 1273 1274 target = RING_GET_REQUEST(uring, uring->req_prod_pvt); 1275 1276 /* 1277 * Save request from the frontend. used for ID mapping and unmap 1278 * on response/cleanup 1279 */ 1280 outstanding_req = &map->um_outstanding_reqs[uid]; 1281 bcopy(req, outstanding_req, sizeof (*outstanding_req)); 1282 1283 /* put the request on the user ring */ 1284 bcopy(req, target, sizeof (*req)); 1285 target->id = (uint64_t)uid; 1286 uring->req_prod_pvt++; 1287 1288 pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM); 1289 1290 return (DDI_SUCCESS); 1291 } 1292 1293 1294 static void 1295 xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid) 1296 { 1297 blkif_request_t *req; 1298 struct seg *seg; 1299 struct as *as; 1300 caddr_t uaddr; 1301 int e; 1302 1303 1304 as = state->bt_map.um_as; 1305 if (as == NULL) { 1306 return; 1307 } 1308 1309 /* get a copy of the original request */ 1310 req = &state->bt_map.um_outstanding_reqs[uid]; 1311 1312 /* unmap the grefs for this request */ 1313 if ((req->operation != BLKIF_OP_WRITE_BARRIER) && 1314 (req->operation != BLKIF_OP_FLUSH_DISKCACHE) && 1315 (req->nr_segments != 0)) { 1316 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid); 1317 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1318 seg = as_findseg(as, state->bt_map.um_guest_pages, 0); 1319 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) > 1320 (seg->s_base + seg->s_size))) { 1321 AS_LOCK_EXIT(as, &as->a_lock); 1322 xpvtap_rs_free(state->bt_map.um_rs, uid); 1323 return; 1324 } 1325 1326 e = segmf_release_grefs(seg, uaddr, req->nr_segments); 1327 if (e != 0) { 1328 cmn_err(CE_WARN, "unable to release grefs"); 1329 } 1330 1331 AS_LOCK_EXIT(as, &as->a_lock); 1332 } 1333 1334 /* free up the user ring id */ 1335 xpvtap_rs_free(state->bt_map.um_rs, uid); 1336 } 1337 1338 1339 static int 1340 xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp, 1341 uint_t *uid) 1342 { 1343 blkif_front_ring_t *uring; 1344 blkif_response_t *target; 1345 1346 1347 uring = &state->bt_user_ring.ur_ring; 1348 1349 if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) { 1350 return (B_FALSE); 1351 } 1352 1353 target = NULL; 1354 target = RING_GET_RESPONSE(uring, uring->rsp_cons); 1355 if (target == NULL) { 1356 return (B_FALSE); 1357 } 1358 1359 /* copy out the user app response */ 1360 bcopy(target, resp, sizeof (*resp)); 1361 uring->rsp_cons++; 1362 1363 /* restore the quests id from the original request */ 1364 *uid = (uint_t)resp->id; 1365 resp->id = state->bt_map.um_outstanding_reqs[*uid].id; 1366 1367 return (B_TRUE); 1368 } 1369 1370 1371 /* 1372 * xpvtap_user_app_stop() 1373 */ 1374 static void xpvtap_user_app_stop(caddr_t arg) 1375 { 1376 xpvtap_state_t *state; 1377 clock_t timeout; 1378 clock_t rc; 1379 1380 1381 state = (xpvtap_state_t *)arg; 1382 1383 /* 1384 * Give the app 10 secs to exit. If it doesn't exit, it's not a serious 1385 * problem, we just won't auto-detach the driver. 1386 */ 1387 mutex_enter(&state->bt_open.bo_mutex); 1388 if (state->bt_open.bo_opened) { 1389 timeout = ddi_get_lbolt() + drv_usectohz(10000000); 1390 rc = cv_timedwait(&state->bt_open.bo_exit_cv, 1391 &state->bt_open.bo_mutex, timeout); 1392 if (rc <= 0) { 1393 cmn_err(CE_NOTE, "!user process still has driver open, " 1394 "deferring detach\n"); 1395 } 1396 } 1397 mutex_exit(&state->bt_open.bo_mutex); 1398 } 1399 1400 1401 /* 1402 * xpvtap_rs_init() 1403 * Initialize the resource structure. init() returns a handle to be used 1404 * for the rest of the resource functions. This code is written assuming 1405 * that min_val will be close to 0. Therefore, we will allocate the free 1406 * buffer only taking max_val into account. 1407 */ 1408 static void 1409 xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle) 1410 { 1411 xpvtap_rs_t *rstruct; 1412 uint_t array_size; 1413 uint_t index; 1414 1415 1416 ASSERT(handle != NULL); 1417 ASSERT(min_val < max_val); 1418 1419 /* alloc space for resource structure */ 1420 rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP); 1421 1422 /* 1423 * Test to see if the max value is 64-bit aligned. If so, we don't need 1424 * to allocate an extra 64-bit word. alloc space for free buffer 1425 * (8 bytes per uint64_t). 1426 */ 1427 if ((max_val & 0x3F) == 0) { 1428 rstruct->rs_free_size = (max_val >> 6) * 8; 1429 } else { 1430 rstruct->rs_free_size = ((max_val >> 6) + 1) * 8; 1431 } 1432 rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP); 1433 1434 /* Initialize resource structure */ 1435 rstruct->rs_min = min_val; 1436 rstruct->rs_last = min_val; 1437 rstruct->rs_max = max_val; 1438 mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL); 1439 rstruct->rs_flushing = B_FALSE; 1440 1441 /* Mark all resources as free */ 1442 array_size = rstruct->rs_free_size >> 3; 1443 for (index = 0; index < array_size; index++) { 1444 rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF; 1445 } 1446 1447 /* setup handle which is returned from this function */ 1448 *handle = rstruct; 1449 } 1450 1451 1452 /* 1453 * xpvtap_rs_fini() 1454 * Frees up the space allocated in init(). Notice that a pointer to the 1455 * handle is used for the parameter. fini() will set the handle to NULL 1456 * before returning. 1457 */ 1458 static void 1459 xpvtap_rs_fini(xpvtap_rs_hdl_t *handle) 1460 { 1461 xpvtap_rs_t *rstruct; 1462 1463 1464 ASSERT(handle != NULL); 1465 1466 rstruct = (xpvtap_rs_t *)*handle; 1467 1468 mutex_destroy(&rstruct->rs_mutex); 1469 kmem_free(rstruct->rs_free, rstruct->rs_free_size); 1470 kmem_free(rstruct, sizeof (xpvtap_rs_t)); 1471 1472 /* set handle to null. This helps catch bugs. */ 1473 *handle = NULL; 1474 } 1475 1476 1477 /* 1478 * xpvtap_rs_alloc() 1479 * alloc a resource. If alloc fails, we are out of resources. 1480 */ 1481 static int 1482 xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource) 1483 { 1484 xpvtap_rs_t *rstruct; 1485 uint_t array_idx; 1486 uint64_t free; 1487 uint_t index; 1488 uint_t last; 1489 uint_t min; 1490 uint_t max; 1491 1492 1493 ASSERT(handle != NULL); 1494 ASSERT(resource != NULL); 1495 1496 rstruct = (xpvtap_rs_t *)handle; 1497 1498 mutex_enter(&rstruct->rs_mutex); 1499 min = rstruct->rs_min; 1500 max = rstruct->rs_max; 1501 1502 /* 1503 * Find a free resource. This will return out of the loop once it finds 1504 * a free resource. There are a total of 'max'-'min'+1 resources. 1505 * Performs a round robin allocation. 1506 */ 1507 for (index = min; index <= max; index++) { 1508 1509 array_idx = rstruct->rs_last >> 6; 1510 free = rstruct->rs_free[array_idx]; 1511 last = rstruct->rs_last & 0x3F; 1512 1513 /* if the next resource to check is free */ 1514 if ((free & ((uint64_t)1 << last)) != 0) { 1515 /* we are using this resource */ 1516 *resource = rstruct->rs_last; 1517 1518 /* take it out of the free list */ 1519 rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last); 1520 1521 /* 1522 * increment the last count so we start checking the 1523 * next resource on the next alloc(). Note the rollover 1524 * at 'max'+1. 1525 */ 1526 rstruct->rs_last++; 1527 if (rstruct->rs_last > max) { 1528 rstruct->rs_last = rstruct->rs_min; 1529 } 1530 1531 /* unlock the resource structure */ 1532 mutex_exit(&rstruct->rs_mutex); 1533 1534 return (DDI_SUCCESS); 1535 } 1536 1537 /* 1538 * This resource is not free, lets go to the next one. Note the 1539 * rollover at 'max'. 1540 */ 1541 rstruct->rs_last++; 1542 if (rstruct->rs_last > max) { 1543 rstruct->rs_last = rstruct->rs_min; 1544 } 1545 } 1546 1547 mutex_exit(&rstruct->rs_mutex); 1548 1549 return (DDI_FAILURE); 1550 } 1551 1552 1553 /* 1554 * xpvtap_rs_free() 1555 * Free the previously alloc'd resource. Once a resource has been free'd, 1556 * it can be used again when alloc is called. 1557 */ 1558 static void 1559 xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource) 1560 { 1561 xpvtap_rs_t *rstruct; 1562 uint_t array_idx; 1563 uint_t offset; 1564 1565 1566 ASSERT(handle != NULL); 1567 1568 rstruct = (xpvtap_rs_t *)handle; 1569 ASSERT(resource >= rstruct->rs_min); 1570 ASSERT(resource <= rstruct->rs_max); 1571 1572 if (!rstruct->rs_flushing) { 1573 mutex_enter(&rstruct->rs_mutex); 1574 } 1575 1576 /* Put the resource back in the free list */ 1577 array_idx = resource >> 6; 1578 offset = resource & 0x3F; 1579 rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset); 1580 1581 if (!rstruct->rs_flushing) { 1582 mutex_exit(&rstruct->rs_mutex); 1583 } 1584 } 1585 1586 1587 /* 1588 * xpvtap_rs_flush() 1589 */ 1590 static void 1591 xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback, 1592 void *arg) 1593 { 1594 xpvtap_rs_t *rstruct; 1595 uint_t array_idx; 1596 uint64_t free; 1597 uint_t index; 1598 uint_t last; 1599 uint_t min; 1600 uint_t max; 1601 1602 1603 ASSERT(handle != NULL); 1604 1605 rstruct = (xpvtap_rs_t *)handle; 1606 1607 mutex_enter(&rstruct->rs_mutex); 1608 min = rstruct->rs_min; 1609 max = rstruct->rs_max; 1610 1611 rstruct->rs_flushing = B_TRUE; 1612 1613 /* 1614 * for all resources not free, call the callback routine to clean it 1615 * up. 1616 */ 1617 for (index = min; index <= max; index++) { 1618 1619 array_idx = rstruct->rs_last >> 6; 1620 free = rstruct->rs_free[array_idx]; 1621 last = rstruct->rs_last & 0x3F; 1622 1623 /* if the next resource to check is not free */ 1624 if ((free & ((uint64_t)1 << last)) == 0) { 1625 /* call the callback to cleanup */ 1626 (*callback)(arg, rstruct->rs_last); 1627 1628 /* put it back in the free list */ 1629 rstruct->rs_free[array_idx] |= ((uint64_t)1 << last); 1630 } 1631 1632 /* go to the next one. Note the rollover at 'max' */ 1633 rstruct->rs_last++; 1634 if (rstruct->rs_last > max) { 1635 rstruct->rs_last = rstruct->rs_min; 1636 } 1637 } 1638 1639 mutex_exit(&rstruct->rs_mutex); 1640 } 1641