1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * xdf.c - Xen Virtual Block Device Driver 29 * TODO: 30 * - support alternate block size (currently only DEV_BSIZE supported) 31 * - revalidate geometry for removable devices 32 */ 33 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 36 #include "xdf.h" 37 38 #define FLUSH_DISKCACHE 0x1 39 #define WRITE_BARRIER 0x2 40 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 41 #define USE_WRITE_BARRIER(vdp) \ 42 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 43 #define USE_FLUSH_DISKCACHE(vdp) \ 44 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 45 #define IS_WRITE_BARRIER(vdp, bp) \ 46 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 47 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 48 #define IS_FLUSH_DISKCACHE(bp) \ 49 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 50 51 static void *vbd_ss; 52 static kmem_cache_t *xdf_vreq_cache; 53 static kmem_cache_t *xdf_gs_cache; 54 static int xdf_maxphys = XB_MAXPHYS; 55 int xdfdebug = 0; 56 extern int do_polled_io; 57 diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 58 int xdf_barrier_flush_disable = 0; 59 60 /* 61 * dev_ops and cb_ops entrypoints 62 */ 63 static int xdf_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 64 static int xdf_attach(dev_info_t *, ddi_attach_cmd_t); 65 static int xdf_detach(dev_info_t *, ddi_detach_cmd_t); 66 static int xdf_reset(dev_info_t *, ddi_reset_cmd_t); 67 static int xdf_open(dev_t *, int, int, cred_t *); 68 static int xdf_close(dev_t, int, int, struct cred *); 69 static int xdf_strategy(struct buf *); 70 static int xdf_read(dev_t, struct uio *, cred_t *); 71 static int xdf_aread(dev_t, struct aio_req *, cred_t *); 72 static int xdf_write(dev_t, struct uio *, cred_t *); 73 static int xdf_awrite(dev_t, struct aio_req *, cred_t *); 74 static int xdf_dump(dev_t, caddr_t, daddr_t, int); 75 static int xdf_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 76 static uint_t xdf_intr(caddr_t); 77 static int xdf_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 78 caddr_t, int *); 79 80 /* 81 * misc private functions 82 */ 83 static int xdf_suspend(dev_info_t *); 84 static int xdf_resume(dev_info_t *); 85 static int xdf_start_connect(xdf_t *); 86 static int xdf_start_disconnect(xdf_t *); 87 static int xdf_post_connect(xdf_t *); 88 static void xdf_post_disconnect(xdf_t *); 89 static void xdf_oe_change(dev_info_t *, ddi_eventcookie_t, void *, void *); 90 static void xdf_iostart(xdf_t *); 91 static void xdf_iofini(xdf_t *, uint64_t, int); 92 static int xdf_prepare_rreq(xdf_t *, struct buf *, blkif_request_t *); 93 static int xdf_drain_io(xdf_t *); 94 static boolean_t xdf_isopen(xdf_t *, int); 95 static int xdf_check_state_transition(xdf_t *, XenbusState); 96 static int xdf_connect(xdf_t *, boolean_t); 97 static int xdf_dmacallback(caddr_t); 98 static void xdf_timeout_handler(void *); 99 static uint_t xdf_iorestart(caddr_t); 100 static v_req_t *vreq_get(xdf_t *, buf_t *); 101 static void vreq_free(xdf_t *, v_req_t *); 102 static int vreq_setup(xdf_t *, v_req_t *); 103 static ge_slot_t *gs_get(xdf_t *, int); 104 static void gs_free(xdf_t *, ge_slot_t *); 105 static grant_ref_t gs_grant(ge_slot_t *, mfn_t); 106 static void unexpectedie(xdf_t *); 107 static void xdfmin(struct buf *); 108 109 static struct cb_ops xdf_cbops = { 110 xdf_open, 111 xdf_close, 112 xdf_strategy, 113 nodev, 114 xdf_dump, 115 xdf_read, 116 xdf_write, 117 xdf_ioctl, 118 nodev, 119 nodev, 120 nodev, 121 nochpoll, 122 xdf_prop_op, 123 NULL, 124 D_MP | D_NEW | D_64BIT, 125 CB_REV, 126 xdf_aread, 127 xdf_awrite 128 }; 129 130 struct dev_ops xdf_devops = { 131 DEVO_REV, /* devo_rev */ 132 0, /* devo_refcnt */ 133 xdf_getinfo, /* devo_getinfo */ 134 nulldev, /* devo_identify */ 135 nulldev, /* devo_probe */ 136 xdf_attach, /* devo_attach */ 137 xdf_detach, /* devo_detach */ 138 xdf_reset, /* devo_reset */ 139 &xdf_cbops, /* devo_cb_ops */ 140 (struct bus_ops *)NULL /* devo_bus_ops */ 141 }; 142 143 static struct modldrv modldrv = { 144 &mod_driverops, /* Type of module. This one is a driver */ 145 "virtual block driver %I%", /* short description */ 146 &xdf_devops /* driver specific ops */ 147 }; 148 149 static struct modlinkage xdf_modlinkage = { 150 MODREV_1, (void *)&modldrv, NULL 151 }; 152 153 /* 154 * I/O buffer DMA attributes 155 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 156 */ 157 static ddi_dma_attr_t xb_dma_attr = { 158 DMA_ATTR_V0, 159 (uint64_t)0, /* lowest address */ 160 (uint64_t)0xffffffffffffffff, /* highest usable address */ 161 (uint64_t)0xffffff, /* DMA counter limit max */ 162 (uint64_t)XB_BSIZE, /* alignment in bytes */ 163 XB_BSIZE - 1, /* bitmap of burst sizes */ 164 XB_BSIZE, /* min transfer */ 165 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 166 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 167 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 168 XB_BSIZE, /* granularity */ 169 0, /* flags (reserved) */ 170 }; 171 172 static ddi_device_acc_attr_t xc_acc_attr = { 173 DDI_DEVICE_ATTR_V0, 174 DDI_NEVERSWAP_ACC, 175 DDI_STRICTORDER_ACC 176 }; 177 178 /* callbacks from commmon label */ 179 180 static int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 181 void *); 182 static int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 183 184 static cmlb_tg_ops_t xdf_lb_ops = { 185 TG_DK_OPS_VERSION_1, 186 xdf_lb_rdwr, 187 xdf_lb_getinfo 188 }; 189 190 int 191 _init(void) 192 { 193 int rc; 194 195 if ((rc = ddi_soft_state_init(&vbd_ss, sizeof (xdf_t), 0)) == 0) { 196 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 197 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 198 ASSERT(xdf_vreq_cache != NULL); 199 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 200 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 201 ASSERT(xdf_gs_cache != NULL); 202 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 203 kmem_cache_destroy(xdf_vreq_cache); 204 kmem_cache_destroy(xdf_gs_cache); 205 ddi_soft_state_fini(&vbd_ss); 206 } 207 } 208 209 return (rc); 210 } 211 212 int 213 _fini(void) 214 { 215 int err; 216 217 if ((err = mod_remove(&xdf_modlinkage)) != 0) 218 return (err); 219 220 kmem_cache_destroy(xdf_vreq_cache); 221 kmem_cache_destroy(xdf_gs_cache); 222 ddi_soft_state_fini(&vbd_ss); 223 224 return (0); 225 } 226 227 int 228 _info(struct modinfo *modinfop) 229 { 230 return (mod_info(&xdf_modlinkage, modinfop)); 231 } 232 233 /*ARGSUSED*/ 234 static int 235 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 236 { 237 int instance; 238 xdf_t *vbdp; 239 240 instance = XDF_INST(getminor((dev_t)arg)); 241 242 switch (cmd) { 243 case DDI_INFO_DEVT2DEVINFO: 244 if ((vbdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) { 245 *rp = NULL; 246 return (DDI_FAILURE); 247 } 248 *rp = vbdp->xdf_dip; 249 return (DDI_SUCCESS); 250 251 case DDI_INFO_DEVT2INSTANCE: 252 *rp = (void *)(uintptr_t)instance; 253 return (DDI_SUCCESS); 254 255 default: 256 return (DDI_FAILURE); 257 } 258 } 259 260 static int 261 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 262 char *name, caddr_t valuep, int *lengthp) 263 { 264 int instance = ddi_get_instance(dip); 265 xdf_t *vdp; 266 diskaddr_t p_blkcnt; 267 268 /* 269 * xdf dynamic properties are device specific and size oriented. 270 * Requests issued under conditions where size is valid are passed 271 * to ddi_prop_op_nblocks with the size information, otherwise the 272 * request is passed to ddi_prop_op. 273 */ 274 vdp = ddi_get_soft_state(vbd_ss, instance); 275 276 if ((dev == DDI_DEV_T_ANY) || (vdp == NULL)) 277 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 278 name, valuep, lengthp)); 279 280 /* do cv_wait until connected or failed */ 281 mutex_enter(&vdp->xdf_dev_lk); 282 if (xdf_connect(vdp, B_TRUE) != XD_READY) { 283 mutex_exit(&vdp->xdf_dev_lk); 284 goto out; 285 } 286 mutex_exit(&vdp->xdf_dev_lk); 287 288 if (cmlb_partinfo(vdp->xdf_vd_lbl, XDF_PART(getminor(dev)), &p_blkcnt, 289 NULL, NULL, NULL, NULL) == 0) 290 return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags, 291 name, valuep, lengthp, (uint64_t)p_blkcnt)); 292 293 out: 294 return (ddi_prop_op(dev, dip, prop_op, mod_flags, name, valuep, 295 lengthp)); 296 } 297 298 static int 299 xdf_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 300 { 301 xdf_t *vdp; 302 ddi_iblock_cookie_t ibc; 303 ddi_iblock_cookie_t softibc; 304 int instance; 305 306 xdfdebug = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_NOTPROM, 307 "xdfdebug", 0); 308 309 switch (cmd) { 310 case DDI_ATTACH: 311 break; 312 313 case DDI_RESUME: 314 return (xdf_resume(devi)); 315 316 default: 317 return (DDI_FAILURE); 318 } 319 320 instance = ddi_get_instance(devi); 321 if (ddi_soft_state_zalloc(vbd_ss, instance) != DDI_SUCCESS) 322 return (DDI_FAILURE); 323 324 DPRINTF(DDI_DBG, ("xdf%d: attaching\n", instance)); 325 vdp = ddi_get_soft_state(vbd_ss, instance); 326 vdp->xdf_dip = devi; 327 if (ddi_get_iblock_cookie(devi, 0, &ibc) != DDI_SUCCESS) { 328 cmn_err(CE_WARN, "xdf@%s: failed to get iblock cookie", 329 ddi_get_name_addr(devi)); 330 goto errout1; 331 } 332 333 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc); 334 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc); 335 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 336 ddi_set_driver_private(devi, vdp); 337 338 if (ddi_get_soft_iblock_cookie(devi, DDI_SOFTINT_LOW, &softibc) 339 != DDI_SUCCESS) { 340 cmn_err(CE_WARN, "xdf@%s: failed to get softintr iblock cookie", 341 ddi_get_name_addr(devi)); 342 goto errout2; 343 } 344 if (ddi_add_softintr(devi, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 345 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 346 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 347 ddi_get_name_addr(devi)); 348 goto errout2; 349 } 350 351 /* 352 * create kstat for iostat(1M) 353 */ 354 if ((vdp->xdf_xdev_iostat = kstat_create("xdf", instance, NULL, "disk", 355 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) != NULL) { 356 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 357 kstat_install(vdp->xdf_xdev_iostat); 358 } else { 359 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 360 ddi_get_name_addr(devi)); 361 goto errout3; 362 } 363 364 /* 365 * driver handles kernel-issued IOCTLs 366 */ 367 if (ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP, 368 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 369 cmn_err(CE_WARN, "xdf@%s: cannot create DDI_KERNEL_IOCTL prop", 370 ddi_get_name_addr(devi)); 371 goto errout4; 372 } 373 374 /* 375 * create default device minor nodes: non-removable disk 376 * we will adjust minor nodes after we are connected w/ backend 377 */ 378 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 379 if (cmlb_attach(devi, &xdf_lb_ops, DTYPE_DIRECT, 0, 1, DDI_NT_BLOCK, 380 CMLB_FAKE_LABEL_ONE_PARTITION, vdp->xdf_vd_lbl, NULL) != 0) { 381 cmn_err(CE_WARN, "xdf@%s: default cmlb attach failed", 382 ddi_get_name_addr(devi)); 383 goto errout5; 384 } 385 386 /* 387 * We ship with cache-enabled disks 388 */ 389 vdp->xdf_wce = 1; 390 391 mutex_enter(&vdp->xdf_cb_lk); 392 393 /* Watch backend XenbusState change */ 394 if (xvdi_add_event_handler(devi, XS_OE_STATE, 395 xdf_oe_change) != DDI_SUCCESS) { 396 mutex_exit(&vdp->xdf_cb_lk); 397 goto errout6; 398 } 399 400 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 401 cmn_err(CE_WARN, "xdf@%s: start connection failed", 402 ddi_get_name_addr(devi)); 403 (void) xdf_start_disconnect(vdp); 404 mutex_exit(&vdp->xdf_cb_lk); 405 goto errout7; 406 } 407 408 mutex_exit(&vdp->xdf_cb_lk); 409 410 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 411 offsetof(v_req_t, v_link)); 412 list_create(&vdp->xdf_gs_act, sizeof (ge_slot_t), 413 offsetof(ge_slot_t, link)); 414 415 ddi_report_dev(devi); 416 DPRINTF(DDI_DBG, ("xdf%d: attached\n", instance)); 417 418 return (DDI_SUCCESS); 419 420 errout7: 421 xvdi_remove_event_handler(devi, XS_OE_STATE); 422 errout6: 423 cmlb_detach(vdp->xdf_vd_lbl, NULL); 424 errout5: 425 cmlb_free_handle(&vdp->xdf_vd_lbl); 426 ddi_prop_remove_all(devi); 427 errout4: 428 kstat_delete(vdp->xdf_xdev_iostat); 429 errout3: 430 ddi_remove_softintr(vdp->xdf_softintr_id); 431 errout2: 432 ddi_set_driver_private(devi, NULL); 433 cv_destroy(&vdp->xdf_dev_cv); 434 mutex_destroy(&vdp->xdf_cb_lk); 435 mutex_destroy(&vdp->xdf_dev_lk); 436 errout1: 437 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(devi)); 438 ddi_soft_state_free(vbd_ss, instance); 439 return (DDI_FAILURE); 440 } 441 442 static int 443 xdf_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 444 { 445 xdf_t *vdp; 446 int instance; 447 448 switch (cmd) { 449 450 case DDI_PM_SUSPEND: 451 break; 452 453 case DDI_SUSPEND: 454 return (xdf_suspend(devi)); 455 456 case DDI_DETACH: 457 break; 458 459 default: 460 return (DDI_FAILURE); 461 } 462 463 instance = ddi_get_instance(devi); 464 DPRINTF(DDI_DBG, ("xdf%d: detaching\n", instance)); 465 vdp = ddi_get_soft_state(vbd_ss, instance); 466 467 if (vdp == NULL) 468 return (DDI_FAILURE); 469 470 mutex_enter(&vdp->xdf_dev_lk); 471 if (xdf_isopen(vdp, -1)) { 472 mutex_exit(&vdp->xdf_dev_lk); 473 return (DDI_FAILURE); 474 } 475 476 if (vdp->xdf_status != XD_CLOSED) { 477 mutex_exit(&vdp->xdf_dev_lk); 478 return (DDI_FAILURE); 479 } 480 481 ASSERT(!ISDMACBON(vdp)); 482 mutex_exit(&vdp->xdf_dev_lk); 483 484 if (vdp->xdf_timeout_id != 0) 485 (void) untimeout(vdp->xdf_timeout_id); 486 487 xvdi_remove_event_handler(devi, XS_OE_STATE); 488 489 /* we'll support backend running in domU later */ 490 #ifdef DOMU_BACKEND 491 (void) xvdi_post_event(devi, XEN_HP_REMOVE); 492 #endif 493 494 list_destroy(&vdp->xdf_vreq_act); 495 list_destroy(&vdp->xdf_gs_act); 496 ddi_prop_remove_all(devi); 497 kstat_delete(vdp->xdf_xdev_iostat); 498 ddi_remove_softintr(vdp->xdf_softintr_id); 499 ddi_set_driver_private(devi, NULL); 500 cv_destroy(&vdp->xdf_dev_cv); 501 mutex_destroy(&vdp->xdf_cb_lk); 502 mutex_destroy(&vdp->xdf_dev_lk); 503 if (vdp->xdf_cache_flush_block != NULL) 504 kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE); 505 ddi_soft_state_free(vbd_ss, instance); 506 return (DDI_SUCCESS); 507 } 508 509 static int 510 xdf_suspend(dev_info_t *devi) 511 { 512 xdf_t *vdp; 513 int instance; 514 enum xdf_state st; 515 516 instance = ddi_get_instance(devi); 517 518 if (xdfdebug & SUSRES_DBG) 519 xen_printf("xdf_suspend: xdf#%d\n", instance); 520 521 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 522 return (DDI_FAILURE); 523 524 xvdi_suspend(devi); 525 526 mutex_enter(&vdp->xdf_cb_lk); 527 mutex_enter(&vdp->xdf_dev_lk); 528 st = vdp->xdf_status; 529 /* change status to stop further I/O requests */ 530 if (st == XD_READY) 531 vdp->xdf_status = XD_SUSPEND; 532 mutex_exit(&vdp->xdf_dev_lk); 533 mutex_exit(&vdp->xdf_cb_lk); 534 535 /* make sure no more I/O responses left in the ring buffer */ 536 if ((st == XD_INIT) || (st == XD_READY)) { 537 (void) ddi_remove_intr(devi, 0, NULL); 538 (void) xdf_drain_io(vdp); 539 /* 540 * no need to teardown the ring buffer here 541 * it will be simply re-init'ed during resume when 542 * we call xvdi_alloc_ring 543 */ 544 } 545 546 if (xdfdebug & SUSRES_DBG) 547 xen_printf("xdf_suspend: SUCCESS\n"); 548 549 return (DDI_SUCCESS); 550 } 551 552 /*ARGSUSED*/ 553 static int 554 xdf_resume(dev_info_t *devi) 555 { 556 xdf_t *vdp; 557 int instance; 558 559 instance = ddi_get_instance(devi); 560 if (xdfdebug & SUSRES_DBG) 561 xen_printf("xdf_resume: xdf%d\n", instance); 562 563 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 564 return (DDI_FAILURE); 565 566 mutex_enter(&vdp->xdf_cb_lk); 567 568 if (xvdi_resume(devi) != DDI_SUCCESS) { 569 mutex_exit(&vdp->xdf_cb_lk); 570 return (DDI_FAILURE); 571 } 572 573 mutex_enter(&vdp->xdf_dev_lk); 574 ASSERT(vdp->xdf_status != XD_READY); 575 vdp->xdf_status = XD_UNKNOWN; 576 mutex_exit(&vdp->xdf_dev_lk); 577 578 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 579 mutex_exit(&vdp->xdf_cb_lk); 580 return (DDI_FAILURE); 581 } 582 583 mutex_exit(&vdp->xdf_cb_lk); 584 585 if (xdfdebug & SUSRES_DBG) 586 xen_printf("xdf_resume: done\n"); 587 return (DDI_SUCCESS); 588 } 589 590 /*ARGSUSED*/ 591 static int 592 xdf_reset(dev_info_t *devi, ddi_reset_cmd_t cmd) 593 { 594 xdf_t *vdp; 595 int instance; 596 597 instance = ddi_get_instance(devi); 598 DPRINTF(DDI_DBG, ("xdf%d: resetting\n", instance)); 599 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 600 return (DDI_FAILURE); 601 602 /* 603 * wait for any outstanding I/O to complete 604 */ 605 (void) xdf_drain_io(vdp); 606 607 DPRINTF(DDI_DBG, ("xdf%d: reset complete\n", instance)); 608 return (DDI_SUCCESS); 609 } 610 611 static int 612 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 613 { 614 minor_t minor; 615 xdf_t *vdp; 616 int part; 617 ulong_t parbit; 618 diskaddr_t p_blkct = 0; 619 boolean_t firstopen; 620 boolean_t nodelay; 621 622 nodelay = (flag & (FNDELAY | FNONBLOCK)); 623 minor = getminor(*devp); 624 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 625 return (ENXIO); 626 627 DPRINTF(DDI_DBG, ("xdf%d: opening\n", XDF_INST(minor))); 628 629 /* do cv_wait until connected or failed */ 630 mutex_enter(&vdp->xdf_dev_lk); 631 if (!nodelay && (xdf_connect(vdp, B_TRUE) != XD_READY)) { 632 mutex_exit(&vdp->xdf_dev_lk); 633 return (ENXIO); 634 } 635 636 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 637 mutex_exit(&vdp->xdf_dev_lk); 638 return (EROFS); 639 } 640 641 part = XDF_PART(minor); 642 parbit = 1 << part; 643 if (vdp->xdf_vd_exclopen & parbit) { 644 mutex_exit(&vdp->xdf_dev_lk); 645 return (EBUSY); 646 } 647 648 /* are we the first one to open this node? */ 649 firstopen = !xdf_isopen(vdp, -1); 650 651 if ((flag & FEXCL) && !firstopen) { 652 mutex_exit(&vdp->xdf_dev_lk); 653 return (EBUSY); 654 } 655 656 if (otyp == OTYP_LYR) 657 vdp->xdf_vd_lyropen[part]++; 658 659 vdp->xdf_vd_open[otyp] |= parbit; 660 661 if (flag & FEXCL) 662 vdp->xdf_vd_exclopen |= parbit; 663 664 mutex_exit(&vdp->xdf_dev_lk); 665 666 /* force a re-validation */ 667 if (firstopen) 668 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 669 670 /* 671 * check size 672 * ignore CD/DVD which contains a zero-sized s0 673 */ 674 if (!nodelay && !XD_IS_CD(vdp) && 675 ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 676 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0))) { 677 (void) xdf_close(*devp, flag, otyp, credp); 678 return (ENXIO); 679 } 680 681 return (0); 682 } 683 684 /*ARGSUSED*/ 685 static int 686 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 687 { 688 minor_t minor; 689 xdf_t *vdp; 690 int part; 691 ulong_t parbit; 692 693 minor = getminor(dev); 694 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 695 return (ENXIO); 696 697 mutex_enter(&vdp->xdf_dev_lk); 698 part = XDF_PART(minor); 699 if (!xdf_isopen(vdp, part)) { 700 mutex_exit(&vdp->xdf_dev_lk); 701 return (ENXIO); 702 } 703 parbit = 1 << part; 704 705 if (otyp == OTYP_LYR) { 706 if (vdp->xdf_vd_lyropen[part] != 0) 707 vdp->xdf_vd_lyropen[part]--; 708 if (vdp->xdf_vd_lyropen[part] == 0) 709 vdp->xdf_vd_open[OTYP_LYR] &= ~parbit; 710 } else { 711 vdp->xdf_vd_open[otyp] &= ~parbit; 712 } 713 vdp->xdf_vd_exclopen &= ~parbit; 714 715 mutex_exit(&vdp->xdf_dev_lk); 716 return (0); 717 } 718 719 static int 720 xdf_strategy(struct buf *bp) 721 { 722 xdf_t *vdp; 723 minor_t minor; 724 diskaddr_t p_blkct, p_blkst; 725 ulong_t nblks; 726 int part; 727 728 minor = getminor(bp->b_edev); 729 part = XDF_PART(minor); 730 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) || 731 !xdf_isopen(vdp, part) || 732 cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 733 &p_blkst, NULL, NULL, NULL)) { 734 bioerror(bp, ENXIO); 735 bp->b_resid = bp->b_bcount; 736 biodone(bp); 737 return (0); 738 } 739 740 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 741 bioerror(bp, EROFS); 742 bp->b_resid = bp->b_bcount; 743 biodone(bp); 744 return (0); 745 } 746 747 /* 748 * starting beyond partition 749 */ 750 if (bp->b_blkno > p_blkct) { 751 DPRINTF(IO_DBG, ("xdf: block %lld exceeds VBD size %"PRIu64, 752 (longlong_t)bp->b_blkno, (uint64_t)p_blkct)); 753 bioerror(bp, EINVAL); 754 bp->b_resid = bp->b_bcount; 755 biodone(bp); 756 return (0); 757 } 758 759 /* Legacy: don't set error flag at this case */ 760 if (bp->b_blkno == p_blkct) { 761 bp->b_resid = bp->b_bcount; 762 biodone(bp); 763 return (0); 764 } 765 766 /* 767 * adjust for partial transfer 768 */ 769 nblks = bp->b_bcount >> XB_BSHIFT; 770 if ((bp->b_blkno + nblks) > p_blkct) { 771 bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT; 772 bp->b_bcount -= bp->b_resid; 773 } 774 775 776 DPRINTF(IO_DBG, ("xdf: strategy blk %lld len %lu\n", 777 (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount)); 778 779 mutex_enter(&vdp->xdf_dev_lk); 780 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 781 if (vdp->xdf_f_act == NULL) { 782 vdp->xdf_f_act = vdp->xdf_l_act = bp; 783 } else { 784 vdp->xdf_l_act->av_forw = bp; 785 vdp->xdf_l_act = bp; 786 } 787 bp->av_forw = NULL; 788 bp->av_back = NULL; /* not tagged with a v_req */ 789 bp->b_private = (void *)(uintptr_t)p_blkst; 790 mutex_exit(&vdp->xdf_dev_lk); 791 xdf_iostart(vdp); 792 if (do_polled_io) 793 (void) xdf_drain_io(vdp); 794 return (0); 795 } 796 797 /*ARGSUSED*/ 798 static int 799 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 800 { 801 802 xdf_t *vdp; 803 minor_t minor; 804 diskaddr_t p_blkcnt; 805 int part; 806 807 minor = getminor(dev); 808 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 809 return (ENXIO); 810 811 DPRINTF(IO_DBG, ("xdf: read offset 0x%"PRIx64"\n", 812 (int64_t)uiop->uio_offset)); 813 814 part = XDF_PART(minor); 815 if (!xdf_isopen(vdp, part)) 816 return (ENXIO); 817 818 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 819 NULL, NULL, NULL, NULL)) 820 return (ENXIO); 821 822 if (U_INVAL(uiop)) 823 return (EINVAL); 824 825 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 826 } 827 828 /*ARGSUSED*/ 829 static int 830 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 831 { 832 xdf_t *vdp; 833 minor_t minor; 834 diskaddr_t p_blkcnt; 835 int part; 836 837 minor = getminor(dev); 838 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 839 return (ENXIO); 840 841 DPRINTF(IO_DBG, ("xdf: write offset 0x%"PRIx64"\n", 842 (int64_t)uiop->uio_offset)); 843 844 part = XDF_PART(minor); 845 if (!xdf_isopen(vdp, part)) 846 return (ENXIO); 847 848 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 849 NULL, NULL, NULL, NULL)) 850 return (ENXIO); 851 852 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 853 return (ENOSPC); 854 855 if (U_INVAL(uiop)) 856 return (EINVAL); 857 858 return (physio(xdf_strategy, NULL, dev, B_WRITE, minphys, uiop)); 859 } 860 861 /*ARGSUSED*/ 862 static int 863 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 864 { 865 xdf_t *vdp; 866 minor_t minor; 867 struct uio *uiop = aiop->aio_uio; 868 diskaddr_t p_blkcnt; 869 int part; 870 871 minor = getminor(dev); 872 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 873 return (ENXIO); 874 875 part = XDF_PART(minor); 876 if (!xdf_isopen(vdp, part)) 877 return (ENXIO); 878 879 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 880 NULL, NULL, NULL, NULL)) 881 return (ENXIO); 882 883 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 884 return (ENOSPC); 885 886 if (U_INVAL(uiop)) 887 return (EINVAL); 888 889 return (aphysio(xdf_strategy, anocancel, dev, B_READ, minphys, aiop)); 890 } 891 892 /*ARGSUSED*/ 893 static int 894 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 895 { 896 xdf_t *vdp; 897 minor_t minor; 898 struct uio *uiop = aiop->aio_uio; 899 diskaddr_t p_blkcnt; 900 int part; 901 902 minor = getminor(dev); 903 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 904 return (ENXIO); 905 906 part = XDF_PART(minor); 907 if (!xdf_isopen(vdp, part)) 908 return (ENXIO); 909 910 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 911 NULL, NULL, NULL, NULL)) 912 return (ENXIO); 913 914 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 915 return (ENOSPC); 916 917 if (U_INVAL(uiop)) 918 return (EINVAL); 919 920 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, minphys, aiop)); 921 } 922 923 static int 924 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 925 { 926 struct buf dumpbuf, *dbp; 927 xdf_t *vdp; 928 minor_t minor; 929 int err = 0; 930 int part; 931 diskaddr_t p_blkcnt, p_blkst; 932 933 minor = getminor(dev); 934 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 935 return (ENXIO); 936 937 DPRINTF(IO_DBG, ("xdf: dump addr (0x%p) blk (%ld) nblks (%d)\n", 938 addr, blkno, nblk)); 939 940 part = XDF_PART(minor); 941 if (!xdf_isopen(vdp, part)) 942 return (ENXIO); 943 944 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 945 NULL, NULL, NULL)) 946 return (ENXIO); 947 948 if ((blkno + nblk) > p_blkcnt) { 949 cmn_err(CE_WARN, "xdf: block %ld exceeds VBD size %"PRIu64, 950 blkno + nblk, (uint64_t)vdp->xdf_xdev_nblocks); 951 return (EINVAL); 952 } 953 954 dbp = &dumpbuf; 955 bioinit(dbp); 956 dbp->b_flags = B_BUSY; 957 dbp->b_un.b_addr = addr; 958 dbp->b_bcount = nblk << DEV_BSHIFT; 959 dbp->b_resid = 0; 960 dbp->b_blkno = blkno; 961 dbp->b_edev = dev; 962 dbp->b_private = (void *)(uintptr_t)p_blkst; 963 964 mutex_enter(&vdp->xdf_dev_lk); 965 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 966 if (vdp->xdf_f_act == NULL) { 967 vdp->xdf_f_act = vdp->xdf_l_act = dbp; 968 } else { 969 vdp->xdf_l_act->av_forw = dbp; 970 vdp->xdf_l_act = dbp; 971 } 972 dbp->av_forw = NULL; 973 dbp->av_back = NULL; 974 mutex_exit(&vdp->xdf_dev_lk); 975 xdf_iostart(vdp); 976 err = xdf_drain_io(vdp); 977 biofini(dbp); 978 return (err); 979 } 980 981 /*ARGSUSED*/ 982 static int 983 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 984 int *rvalp) 985 { 986 int instance; 987 xdf_t *vdp; 988 minor_t minor; 989 int part; 990 991 minor = getminor(dev); 992 instance = XDF_INST(minor); 993 994 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 995 return (ENXIO); 996 997 DPRINTF(IOCTL_DBG, ("xdf%d:ioctl: cmd %d (0x%x)\n", 998 instance, cmd, cmd)); 999 1000 part = XDF_PART(minor); 1001 if (!xdf_isopen(vdp, part)) 1002 return (ENXIO); 1003 1004 switch (cmd) { 1005 case DKIOCGMEDIAINFO: { 1006 struct dk_minfo media_info; 1007 1008 media_info.dki_lbsize = DEV_BSIZE; 1009 media_info.dki_capacity = vdp->xdf_xdev_nblocks; 1010 media_info.dki_media_type = DK_FIXED_DISK; 1011 1012 if (ddi_copyout(&media_info, (void *)arg, 1013 sizeof (struct dk_minfo), mode)) { 1014 return (EFAULT); 1015 } else { 1016 return (0); 1017 } 1018 } 1019 1020 case DKIOCINFO: { 1021 struct dk_cinfo info; 1022 1023 /* controller information */ 1024 if (XD_IS_CD(vdp)) 1025 info.dki_ctype = DKC_CDROM; 1026 else 1027 info.dki_ctype = DKC_VBD; 1028 1029 info.dki_cnum = 0; 1030 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 1031 1032 /* unit information */ 1033 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 1034 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 1035 info.dki_flags = DKI_FMTVOL; 1036 info.dki_partition = part; 1037 info.dki_maxtransfer = maxphys / DEV_BSIZE; 1038 info.dki_addr = 0; 1039 info.dki_space = 0; 1040 info.dki_prio = 0; 1041 info.dki_vec = 0; 1042 1043 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 1044 return (EFAULT); 1045 else 1046 return (0); 1047 } 1048 1049 case DKIOCSTATE: { 1050 enum dkio_state dkstate = DKIO_INSERTED; 1051 if (ddi_copyout(&dkstate, (void *)arg, sizeof (dkstate), 1052 mode) != 0) 1053 return (EFAULT); 1054 return (0); 1055 } 1056 1057 /* 1058 * is media removable? 1059 */ 1060 case DKIOCREMOVABLE: { 1061 int i = XD_IS_RM(vdp) ? 1 : 0; 1062 if (ddi_copyout(&i, (caddr_t)arg, sizeof (int), mode)) 1063 return (EFAULT); 1064 return (0); 1065 } 1066 1067 case DKIOCG_PHYGEOM: 1068 case DKIOCG_VIRTGEOM: 1069 case DKIOCGGEOM: 1070 case DKIOCSGEOM: 1071 case DKIOCGAPART: 1072 case DKIOCGVTOC: 1073 case DKIOCSVTOC: 1074 case DKIOCPARTINFO: 1075 case DKIOCGETEFI: 1076 case DKIOCSETEFI: 1077 case DKIOCPARTITION: { 1078 int rc; 1079 1080 rc = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 1081 rvalp, NULL); 1082 return (rc); 1083 } 1084 1085 case DKIOCGETWCE: 1086 if (ddi_copyout(&vdp->xdf_wce, (void *)arg, 1087 sizeof (vdp->xdf_wce), mode)) 1088 return (EFAULT); 1089 return (0); 1090 case DKIOCSETWCE: 1091 if (ddi_copyin((void *)arg, &vdp->xdf_wce, 1092 sizeof (vdp->xdf_wce), mode)) 1093 return (EFAULT); 1094 return (0); 1095 case DKIOCFLUSHWRITECACHE: { 1096 int rc; 1097 struct dk_callback *dkc = (struct dk_callback *)arg; 1098 1099 if (vdp->xdf_flush_supported) { 1100 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1101 NULL, 0, 0, (void *)dev); 1102 } else if (vdp->xdf_feature_barrier && 1103 !xdf_barrier_flush_disable) { 1104 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1105 vdp->xdf_cache_flush_block, xdf_flush_block, 1106 DEV_BSIZE, (void *)dev); 1107 } else { 1108 return (ENOTTY); 1109 } 1110 if ((mode & FKIOCTL) && (dkc != NULL) && 1111 (dkc->dkc_callback != NULL)) { 1112 (*dkc->dkc_callback)(dkc->dkc_cookie, rc); 1113 /* need to return 0 after calling callback */ 1114 rc = 0; 1115 } 1116 return (rc); 1117 } 1118 1119 default: 1120 return (ENOTTY); 1121 } 1122 } 1123 1124 /* 1125 * xdf interrupt handler 1126 */ 1127 static uint_t 1128 xdf_intr(caddr_t arg) 1129 { 1130 xdf_t *vdp = (xdf_t *)arg; 1131 xendev_ring_t *xbr; 1132 blkif_response_t *resp; 1133 int bioerr; 1134 uint64_t id; 1135 extern int do_polled_io; 1136 uint8_t op; 1137 uint16_t status; 1138 ddi_acc_handle_t acchdl; 1139 1140 mutex_enter(&vdp->xdf_dev_lk); 1141 1142 if ((xbr = vdp->xdf_xb_ring) == NULL) { 1143 mutex_exit(&vdp->xdf_dev_lk); 1144 return (DDI_INTR_UNCLAIMED); 1145 } 1146 1147 acchdl = vdp->xdf_xb_ring_hdl; 1148 1149 /* 1150 * complete all requests which have a response 1151 */ 1152 while (resp = xvdi_ring_get_response(xbr)) { 1153 id = ddi_get64(acchdl, &resp->id); 1154 op = ddi_get8(acchdl, &resp->operation); 1155 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 1156 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 1157 op, id, status)); 1158 1159 /* 1160 * XXPV - close connection to the backend and restart 1161 */ 1162 if (status != BLKIF_RSP_OKAY) { 1163 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 1164 ddi_get_name_addr(vdp->xdf_dip), 1165 (op == BLKIF_OP_READ) ? "reading" : "writing")); 1166 bioerr = EIO; 1167 } else { 1168 bioerr = 0; 1169 } 1170 1171 xdf_iofini(vdp, id, bioerr); 1172 } 1173 1174 mutex_exit(&vdp->xdf_dev_lk); 1175 1176 if (!do_polled_io) 1177 xdf_iostart(vdp); 1178 1179 return (DDI_INTR_CLAIMED); 1180 } 1181 1182 int xdf_fbrewrites; /* how many times was our flush block rewritten */ 1183 1184 /* 1185 * Snarf new data if our flush block was re-written 1186 */ 1187 static void 1188 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 1189 { 1190 int nblks; 1191 boolean_t mapin; 1192 1193 if (IS_WRITE_BARRIER(vdp, bp)) 1194 return; /* write was a flush write */ 1195 1196 mapin = B_FALSE; 1197 nblks = bp->b_bcount >> DEV_BSHIFT; 1198 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 1199 xdf_fbrewrites++; 1200 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 1201 mapin = B_TRUE; 1202 bp_mapin(bp); 1203 } 1204 bcopy(bp->b_un.b_addr + 1205 ((xdf_flush_block - blkno) << DEV_BSHIFT), 1206 vdp->xdf_cache_flush_block, DEV_BSIZE); 1207 if (mapin) 1208 bp_mapout(bp); 1209 } 1210 } 1211 1212 static void 1213 xdf_iofini(xdf_t *vdp, uint64_t id, int bioerr) 1214 { 1215 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 1216 v_req_t *vreq = gs->vreq; 1217 buf_t *bp = vreq->v_buf; 1218 1219 gs_free(vdp, gs); 1220 if (bioerr) 1221 bioerror(bp, bioerr); 1222 vreq->v_nslots--; 1223 if (vreq->v_nslots != 0) 1224 return; 1225 1226 XDF_UPDATE_IO_STAT(vdp, bp); 1227 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1228 1229 if (IS_ERROR(bp)) 1230 bp->b_resid = bp->b_bcount; 1231 1232 vreq_free(vdp, vreq); 1233 biodone(bp); 1234 } 1235 1236 /* 1237 * return value of xdf_prepare_rreq() 1238 * used in xdf_iostart() 1239 */ 1240 #define XF_PARTIAL 0 /* rreq is full, not all I/O in buf transferred */ 1241 #define XF_COMP 1 /* no more I/O left in buf */ 1242 1243 static void 1244 xdf_iostart(xdf_t *vdp) 1245 { 1246 xendev_ring_t *xbr; 1247 struct buf *bp; 1248 blkif_request_t *rreq; 1249 int retval; 1250 int rreqready = 0; 1251 1252 xbr = vdp->xdf_xb_ring; 1253 1254 /* 1255 * populate the ring request(s) 1256 * 1257 * loop until there is no buf to transfer or no free slot 1258 * available in I/O ring 1259 */ 1260 mutex_enter(&vdp->xdf_dev_lk); 1261 1262 for (;;) { 1263 if (vdp->xdf_status != XD_READY) 1264 break; 1265 1266 /* active buf queue empty? */ 1267 if ((bp = vdp->xdf_f_act) == NULL) 1268 break; 1269 1270 /* try to grab a vreq for this bp */ 1271 if ((BP2VREQ(bp) == NULL) && (vreq_get(vdp, bp) == NULL)) 1272 break; 1273 /* alloc DMA/GTE resources */ 1274 if (vreq_setup(vdp, BP2VREQ(bp)) != DDI_SUCCESS) 1275 break; 1276 1277 /* get next blkif_request in the ring */ 1278 if ((rreq = xvdi_ring_get_request(xbr)) == NULL) 1279 break; 1280 bzero(rreq, sizeof (blkif_request_t)); 1281 1282 /* populate blkif_request with this buf */ 1283 rreqready++; 1284 retval = xdf_prepare_rreq(vdp, bp, rreq); 1285 if (retval == XF_COMP) { 1286 /* finish this bp, switch to next one */ 1287 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1288 vdp->xdf_f_act = bp->av_forw; 1289 bp->av_forw = NULL; 1290 } 1291 } 1292 1293 /* 1294 * Send the request(s) to the backend 1295 */ 1296 if (rreqready) { 1297 if (xvdi_ring_push_request(xbr)) { 1298 DPRINTF(IO_DBG, ("xdf_iostart: " 1299 "sent request(s) to backend\n")); 1300 xvdi_notify_oe(vdp->xdf_dip); 1301 } 1302 } 1303 1304 mutex_exit(&vdp->xdf_dev_lk); 1305 } 1306 1307 /* 1308 * populate a single blkif_request_t w/ a buf 1309 */ 1310 static int 1311 xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1312 { 1313 int rval; 1314 grant_ref_t gr; 1315 uint8_t fsect, lsect; 1316 size_t bcnt; 1317 paddr_t dma_addr; 1318 off_t blk_off; 1319 dev_info_t *dip = vdp->xdf_dip; 1320 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1321 v_req_t *vreq = BP2VREQ(bp); 1322 uint64_t blkno = vreq->v_blkno; 1323 uint_t ndmacs = vreq->v_ndmacs; 1324 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1325 int seg = 0; 1326 int isread = IS_READ(bp); 1327 1328 if (isread) 1329 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1330 else { 1331 switch (vreq->v_flush_diskcache) { 1332 case FLUSH_DISKCACHE: 1333 ddi_put8(acchdl, &rreq->operation, 1334 BLKIF_OP_FLUSH_DISKCACHE); 1335 ddi_put16(acchdl, &rreq->handle, vdev); 1336 ddi_put64(acchdl, &rreq->id, 1337 (uint64_t)(uintptr_t)(vreq->v_gs)); 1338 ddi_put8(acchdl, &rreq->nr_segments, 0); 1339 return (XF_COMP); 1340 case WRITE_BARRIER: 1341 ddi_put8(acchdl, &rreq->operation, 1342 BLKIF_OP_WRITE_BARRIER); 1343 break; 1344 default: 1345 if (!vdp->xdf_wce) 1346 ddi_put8(acchdl, &rreq->operation, 1347 BLKIF_OP_WRITE_BARRIER); 1348 else 1349 ddi_put8(acchdl, &rreq->operation, 1350 BLKIF_OP_WRITE); 1351 break; 1352 } 1353 } 1354 1355 ddi_put16(acchdl, &rreq->handle, vdev); 1356 ddi_put64(acchdl, &rreq->sector_number, blkno); 1357 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(vreq->v_gs)); 1358 1359 /* 1360 * loop until all segments are populated or no more dma cookie in buf 1361 */ 1362 for (;;) { 1363 /* 1364 * Each segment of a blkif request can transfer up to 1365 * one 4K page of data. 1366 */ 1367 bcnt = vreq->v_dmac.dmac_size; 1368 ASSERT(bcnt <= PAGESIZE); 1369 ASSERT((bcnt % XB_BSIZE) == 0); 1370 dma_addr = vreq->v_dmac.dmac_laddress; 1371 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1372 ASSERT((blk_off & XB_BMASK) == 0); 1373 fsect = blk_off >> XB_BSHIFT; 1374 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1375 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1376 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1377 DPRINTF(IO_DBG, (" ""seg%d: dmacS %lu blk_off %ld\n", 1378 seg, vreq->v_dmac.dmac_size, blk_off)); 1379 gr = gs_grant(vreq->v_gs, PATOMA(dma_addr) >> PAGESHIFT); 1380 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1381 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1382 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1383 DPRINTF(IO_DBG, (" ""seg%d: fs %d ls %d gr %d dma 0x%"PRIx64 1384 "\n", seg, fsect, lsect, gr, dma_addr)); 1385 1386 blkno += (bcnt >> XB_BSHIFT); 1387 seg++; 1388 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1389 if (--ndmacs) { 1390 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1391 continue; 1392 } 1393 1394 vreq->v_status = VREQ_DMAWIN_DONE; 1395 vreq->v_blkno = blkno; 1396 if (vreq->v_dmaw + 1 == vreq->v_ndmaws) 1397 /* last win */ 1398 rval = XF_COMP; 1399 else 1400 rval = XF_PARTIAL; 1401 break; 1402 } 1403 ddi_put8(acchdl, &rreq->nr_segments, seg); 1404 DPRINTF(IO_DBG, ("xdf_prepare_rreq: request id=%"PRIx64" ready\n", 1405 rreq->id)); 1406 1407 return (rval); 1408 } 1409 1410 #define XDF_QSEC 50000 /* .005 second */ 1411 #define XDF_POLLCNT 12 /* loop for 12 times before time out */ 1412 1413 static int 1414 xdf_drain_io(xdf_t *vdp) 1415 { 1416 int pollc, rval; 1417 xendev_ring_t *xbr; 1418 1419 if (xdfdebug & SUSRES_DBG) 1420 xen_printf("xdf_drain_io: start\n"); 1421 1422 mutex_enter(&vdp->xdf_dev_lk); 1423 1424 if ((vdp->xdf_status != XD_READY) && (vdp->xdf_status != XD_SUSPEND)) 1425 goto out; 1426 1427 rval = 0; 1428 xbr = vdp->xdf_xb_ring; 1429 ASSERT(xbr != NULL); 1430 1431 for (pollc = 0; pollc < XDF_POLLCNT; pollc++) { 1432 if (xvdi_ring_has_unconsumed_responses(xbr)) { 1433 mutex_exit(&vdp->xdf_dev_lk); 1434 (void) xdf_intr((caddr_t)vdp); 1435 mutex_enter(&vdp->xdf_dev_lk); 1436 } 1437 if (!xvdi_ring_has_incomp_request(xbr)) 1438 goto out; 1439 1440 (void) HYPERVISOR_yield(); 1441 /* 1442 * file-backed devices can be slow 1443 */ 1444 drv_usecwait(XDF_QSEC << pollc); 1445 } 1446 cmn_err(CE_WARN, "xdf_polled_io: timeout"); 1447 rval = EIO; 1448 out: 1449 mutex_exit(&vdp->xdf_dev_lk); 1450 if (xdfdebug & SUSRES_DBG) 1451 xen_printf("xdf_drain_io: end, err=%d\n", rval); 1452 return (rval); 1453 } 1454 1455 /* ARGSUSED5 */ 1456 static int 1457 xdf_lb_rdwr(dev_info_t *devi, uchar_t cmd, void *bufp, 1458 diskaddr_t start, size_t reqlen, void *tg_cookie) 1459 { 1460 xdf_t *vdp; 1461 struct buf *bp; 1462 int err = 0; 1463 1464 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1465 if (vdp == NULL) 1466 return (ENXIO); 1467 1468 if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_xdev_nblocks) 1469 return (EINVAL); 1470 1471 bp = getrbuf(KM_SLEEP); 1472 if (cmd == TG_READ) 1473 bp->b_flags = B_BUSY | B_READ; 1474 else 1475 bp->b_flags = B_BUSY | B_WRITE; 1476 bp->b_un.b_addr = bufp; 1477 bp->b_bcount = reqlen; 1478 bp->b_resid = 0; 1479 bp->b_blkno = start; 1480 bp->av_forw = NULL; 1481 bp->av_back = NULL; 1482 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 1483 1484 mutex_enter(&vdp->xdf_dev_lk); 1485 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1486 if (vdp->xdf_f_act == NULL) { 1487 vdp->xdf_f_act = vdp->xdf_l_act = bp; 1488 } else { 1489 vdp->xdf_l_act->av_forw = bp; 1490 vdp->xdf_l_act = bp; 1491 } 1492 mutex_exit(&vdp->xdf_dev_lk); 1493 xdf_iostart(vdp); 1494 err = biowait(bp); 1495 1496 ASSERT(bp->b_flags & B_DONE); 1497 1498 freerbuf(bp); 1499 return (err); 1500 } 1501 1502 /* 1503 * synthetic geometry 1504 */ 1505 #define XDF_NSECTS 256 1506 #define XDF_NHEADS 16 1507 1508 static int 1509 xdf_lb_getcap(dev_info_t *devi, diskaddr_t *capp) 1510 { 1511 xdf_t *vdp; 1512 1513 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1514 1515 if (vdp == NULL) 1516 return (ENXIO); 1517 1518 mutex_enter(&vdp->xdf_dev_lk); 1519 *capp = vdp->xdf_xdev_nblocks; 1520 DPRINTF(LBL_DBG, ("capacity %llu\n", *capp)); 1521 mutex_exit(&vdp->xdf_dev_lk); 1522 return (0); 1523 } 1524 1525 static int 1526 xdf_lb_getpgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1527 { 1528 xdf_t *vdp; 1529 uint_t ncyl; 1530 uint_t spc = XDF_NHEADS * XDF_NSECTS; 1531 1532 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1533 1534 if (vdp == NULL) 1535 return (ENXIO); 1536 1537 ncyl = vdp->xdf_xdev_nblocks / spc; 1538 1539 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1540 geomp->g_acyl = 0; 1541 geomp->g_nhead = XDF_NHEADS; 1542 geomp->g_secsize = XB_BSIZE; 1543 geomp->g_nsect = XDF_NSECTS; 1544 geomp->g_intrlv = 0; 1545 geomp->g_rpm = 7200; 1546 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1547 return (0); 1548 } 1549 1550 /* 1551 * No real HBA, no geometry available from it 1552 */ 1553 /*ARGSUSED*/ 1554 static int 1555 xdf_lb_getvgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1556 { 1557 return (EINVAL); 1558 } 1559 1560 static int 1561 xdf_lb_getattribute(dev_info_t *devi, tg_attribute_t *tgattributep) 1562 { 1563 xdf_t *vdp; 1564 1565 if (!(vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)))) 1566 return (ENXIO); 1567 1568 if (XD_IS_RO(vdp)) 1569 tgattributep->media_is_writable = 0; 1570 else 1571 tgattributep->media_is_writable = 1; 1572 return (0); 1573 } 1574 1575 /* ARGSUSED3 */ 1576 static int 1577 xdf_lb_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie) 1578 { 1579 switch (cmd) { 1580 case TG_GETPHYGEOM: 1581 return (xdf_lb_getpgeom(devi, (cmlb_geom_t *)arg)); 1582 case TG_GETVIRTGEOM: 1583 return (xdf_lb_getvgeom(devi, (cmlb_geom_t *)arg)); 1584 case TG_GETCAPACITY: 1585 return (xdf_lb_getcap(devi, (diskaddr_t *)arg)); 1586 case TG_GETBLOCKSIZE: 1587 *(uint32_t *)arg = XB_BSIZE; 1588 return (0); 1589 case TG_GETATTR: 1590 return (xdf_lb_getattribute(devi, (tg_attribute_t *)arg)); 1591 default: 1592 return (ENOTTY); 1593 } 1594 } 1595 1596 /* 1597 * Kick-off connect process 1598 * Status should be XD_UNKNOWN or XD_CLOSED 1599 * On success, status will be changed to XD_INIT 1600 * On error, status won't be changed 1601 */ 1602 static int 1603 xdf_start_connect(xdf_t *vdp) 1604 { 1605 char *xsnode; 1606 grant_ref_t gref; 1607 xenbus_transaction_t xbt; 1608 int rv; 1609 dev_info_t *dip = vdp->xdf_dip; 1610 1611 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == (domid_t)-1) 1612 goto errout; 1613 1614 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) { 1615 cmn_err(CE_WARN, "xdf@%s: failed to alloc event channel", 1616 ddi_get_name_addr(dip)); 1617 goto errout; 1618 } 1619 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1620 DDI_SUCCESS) { 1621 cmn_err(CE_WARN, "xdf_start_connect: xdf@%s: " 1622 "failed to add intr handler", ddi_get_name_addr(dip)); 1623 goto errout1; 1624 } 1625 1626 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1627 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1628 DDI_SUCCESS) { 1629 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1630 ddi_get_name_addr(dip)); 1631 goto errout2; 1632 } 1633 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1634 1635 /* 1636 * Write into xenstore the info needed by backend 1637 */ 1638 if ((xsnode = xvdi_get_xsname(dip)) == NULL) { 1639 cmn_err(CE_WARN, "xdf@%s: " 1640 "failed to get xenstore node path", 1641 ddi_get_name_addr(dip)); 1642 goto fail_trans; 1643 } 1644 trans_retry: 1645 if (xenbus_transaction_start(&xbt)) { 1646 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1647 ddi_get_name_addr(dip)); 1648 xvdi_fatal_error(dip, EIO, "transaction start"); 1649 goto fail_trans; 1650 } 1651 1652 if (rv = xenbus_printf(xbt, xsnode, "ring-ref", "%u", gref)) { 1653 cmn_err(CE_WARN, "xdf@%s: failed to write ring-ref", 1654 ddi_get_name_addr(dip)); 1655 xvdi_fatal_error(dip, rv, "writing ring-ref"); 1656 goto abort_trans; 1657 } 1658 1659 if (rv = xenbus_printf(xbt, xsnode, "event-channel", "%u", 1660 xvdi_get_evtchn(dip))) { 1661 cmn_err(CE_WARN, "xdf@%s: failed to write event-channel", 1662 ddi_get_name_addr(dip)); 1663 xvdi_fatal_error(dip, rv, "writing event-channel"); 1664 goto abort_trans; 1665 } 1666 1667 if ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0) { 1668 cmn_err(CE_WARN, "xdf@%s: " 1669 "failed to switch state to XenbusStateInitialised", 1670 ddi_get_name_addr(dip)); 1671 xvdi_fatal_error(dip, rv, "writing state"); 1672 goto abort_trans; 1673 } 1674 1675 /* kick-off connect process */ 1676 if (rv = xenbus_transaction_end(xbt, 0)) { 1677 if (rv == EAGAIN) 1678 goto trans_retry; 1679 cmn_err(CE_WARN, "xdf@%s: failed to end transaction", 1680 ddi_get_name_addr(dip)); 1681 xvdi_fatal_error(dip, rv, "completing transaction"); 1682 goto fail_trans; 1683 } 1684 1685 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1686 mutex_enter(&vdp->xdf_dev_lk); 1687 vdp->xdf_status = XD_INIT; 1688 mutex_exit(&vdp->xdf_dev_lk); 1689 1690 return (DDI_SUCCESS); 1691 1692 abort_trans: 1693 (void) xenbus_transaction_end(xbt, 1); 1694 fail_trans: 1695 xvdi_free_ring(vdp->xdf_xb_ring); 1696 errout2: 1697 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1698 errout1: 1699 xvdi_free_evtchn(dip); 1700 errout: 1701 cmn_err(CE_WARN, "xdf@%s: fail to kick-off connecting", 1702 ddi_get_name_addr(dip)); 1703 return (DDI_FAILURE); 1704 } 1705 1706 /* 1707 * Kick-off disconnect process 1708 * Status won't be changed 1709 */ 1710 static int 1711 xdf_start_disconnect(xdf_t *vdp) 1712 { 1713 if (xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed) > 0) { 1714 cmn_err(CE_WARN, "xdf@%s: fail to kick-off disconnecting", 1715 ddi_get_name_addr(vdp->xdf_dip)); 1716 return (DDI_FAILURE); 1717 } 1718 1719 return (DDI_SUCCESS); 1720 } 1721 1722 int 1723 xdf_get_flush_block(xdf_t *vdp) 1724 { 1725 /* 1726 * Get a DEV_BSIZE aligned bufer 1727 */ 1728 vdp->xdf_flush_mem = kmem_alloc(DEV_BSIZE * 2, KM_SLEEP); 1729 vdp->xdf_cache_flush_block = 1730 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), DEV_BSIZE); 1731 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1732 xdf_flush_block, DEV_BSIZE, NULL) != 0) 1733 return (DDI_FAILURE); 1734 return (DDI_SUCCESS); 1735 } 1736 1737 /* 1738 * Finish other initialization after we've connected to backend 1739 * Status should be XD_INIT before calling this routine 1740 * On success, status should be changed to XD_READY 1741 * On error, status should stay XD_INIT 1742 */ 1743 static int 1744 xdf_post_connect(xdf_t *vdp) 1745 { 1746 int rv; 1747 uint_t len; 1748 char *type; 1749 char *barrier; 1750 dev_info_t *devi = vdp->xdf_dip; 1751 1752 /* 1753 * Determine if feature barrier is supported by backend 1754 */ 1755 if (xenbus_read(XBT_NULL, xvdi_get_oename(devi), 1756 "feature-barrier", (void **)&barrier, &len) == 0) { 1757 vdp->xdf_feature_barrier = 1; 1758 kmem_free(barrier, len); 1759 } else { 1760 cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier", 1761 ddi_get_name_addr(vdp->xdf_dip)); 1762 vdp->xdf_feature_barrier = 0; 1763 } 1764 1765 /* probe backend */ 1766 if (rv = xenbus_gather(XBT_NULL, xvdi_get_oename(devi), 1767 "sectors", "%"SCNu64, &vdp->xdf_xdev_nblocks, 1768 "info", "%u", &vdp->xdf_xdev_info, NULL)) { 1769 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1770 "cannot read backend info", ddi_get_name_addr(devi)); 1771 xvdi_fatal_error(devi, rv, "reading backend info"); 1772 return (DDI_FAILURE); 1773 } 1774 1775 /* fix disk type */ 1776 if (xenbus_read(XBT_NULL, xvdi_get_xsname(devi), "device-type", 1777 (void **)&type, &len) != 0) { 1778 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1779 "cannot read device-type", ddi_get_name_addr(devi)); 1780 xvdi_fatal_error(devi, rv, "reading device-type"); 1781 return (DDI_FAILURE); 1782 } 1783 if (strcmp(type, "cdrom") == 0) 1784 vdp->xdf_xdev_info |= VDISK_CDROM; 1785 kmem_free(type, len); 1786 1787 /* 1788 * We've created all the minor nodes via cmlb_attach() using default 1789 * value in xdf_attach() to make it possbile to block in xdf_open(), 1790 * in case there's anyone (say, booting thread) ever trying to open 1791 * it before connected to backend. We will refresh all those minor 1792 * nodes w/ latest info we've got now when we are almost connected. 1793 * 1794 * Don't do this when xdf is already opened by someone (could happen 1795 * during resume), for that cmlb_attach() will invalid the label info 1796 * and confuse those who has already opened the node, which is bad. 1797 */ 1798 if (!xdf_isopen(vdp, -1) && (XD_IS_CD(vdp) || XD_IS_RM(vdp))) { 1799 /* re-init cmlb w/ latest info we got from backend */ 1800 if (cmlb_attach(devi, &xdf_lb_ops, 1801 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 1802 XD_IS_RM(vdp), 1, DDI_NT_BLOCK, 1803 CMLB_FAKE_LABEL_ONE_PARTITION, 1804 vdp->xdf_vd_lbl, NULL) != 0) { 1805 cmn_err(CE_WARN, "xdf@%s: cmlb attach failed", 1806 ddi_get_name_addr(devi)); 1807 return (DDI_FAILURE); 1808 } 1809 } 1810 1811 /* mark vbd is ready for I/O */ 1812 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1813 mutex_enter(&vdp->xdf_dev_lk); 1814 vdp->xdf_status = XD_READY; 1815 mutex_exit(&vdp->xdf_dev_lk); 1816 /* 1817 * If backend has feature-barrier, see if it supports disk 1818 * cache flush op. 1819 */ 1820 vdp->xdf_flush_supported = 0; 1821 if (vdp->xdf_feature_barrier) { 1822 /* 1823 * Pretend we already know flush is supported so probe 1824 * will attempt the correct op. 1825 */ 1826 vdp->xdf_flush_supported = 1; 1827 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1828 vdp->xdf_flush_supported = 1; 1829 } else { 1830 vdp->xdf_flush_supported = 0; 1831 /* 1832 * If the other end does not support the cache flush op 1833 * then we must use a barrier-write to force disk 1834 * cache flushing. Barrier writes require that a data 1835 * block actually be written. 1836 * Cache a block to barrier-write when we are 1837 * asked to perform a flush. 1838 * XXX - would it be better to just copy 1 block 1839 * (512 bytes) from whatever write we did last 1840 * and rewrite that block? 1841 */ 1842 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) 1843 return (DDI_FAILURE); 1844 } 1845 } 1846 1847 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", ddi_get_name_addr(devi), 1848 (uint64_t)vdp->xdf_xdev_nblocks); 1849 1850 return (DDI_SUCCESS); 1851 } 1852 1853 /* 1854 * Finish other uninitialization after we've disconnected from backend 1855 * when status is XD_CLOSING or XD_INIT. After returns, status is XD_CLOSED 1856 */ 1857 static void 1858 xdf_post_disconnect(xdf_t *vdp) 1859 { 1860 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1861 xvdi_free_evtchn(vdp->xdf_dip); 1862 xvdi_free_ring(vdp->xdf_xb_ring); 1863 vdp->xdf_xb_ring = NULL; 1864 vdp->xdf_xb_ring_hdl = NULL; 1865 vdp->xdf_peer = (domid_t)-1; 1866 1867 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1868 mutex_enter(&vdp->xdf_dev_lk); 1869 vdp->xdf_status = XD_CLOSED; 1870 mutex_exit(&vdp->xdf_dev_lk); 1871 } 1872 1873 /*ARGSUSED*/ 1874 static void 1875 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 1876 { 1877 XenbusState new_state = *(XenbusState *)impl_data; 1878 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 1879 boolean_t unexpect_die = B_FALSE; 1880 int status; 1881 1882 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 1883 ddi_get_name_addr(dip), new_state)); 1884 1885 mutex_enter(&vdp->xdf_cb_lk); 1886 1887 if (xdf_check_state_transition(vdp, new_state) == DDI_FAILURE) { 1888 mutex_exit(&vdp->xdf_cb_lk); 1889 return; 1890 } 1891 1892 switch (new_state) { 1893 case XenbusStateInitialising: 1894 ASSERT(vdp->xdf_status == XD_CLOSED); 1895 /* 1896 * backend recovered from a previous failure, 1897 * kick-off connect process again 1898 */ 1899 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 1900 cmn_err(CE_WARN, "xdf@%s:" 1901 " failed to start reconnecting to backend", 1902 ddi_get_name_addr(dip)); 1903 } 1904 break; 1905 case XenbusStateConnected: 1906 ASSERT(vdp->xdf_status == XD_INIT); 1907 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1908 /* finish final init after connect */ 1909 if (xdf_post_connect(vdp) != DDI_SUCCESS) 1910 (void) xdf_start_disconnect(vdp); 1911 break; 1912 case XenbusStateClosing: 1913 if (vdp->xdf_status == XD_READY) { 1914 mutex_enter(&vdp->xdf_dev_lk); 1915 if (xdf_isopen(vdp, -1)) { 1916 cmn_err(CE_NOTE, "xdf@%s: hot-unplug failed, " 1917 "still in use", ddi_get_name_addr(dip)); 1918 mutex_exit(&vdp->xdf_dev_lk); 1919 break; 1920 } else { 1921 vdp->xdf_status = XD_CLOSING; 1922 } 1923 mutex_exit(&vdp->xdf_dev_lk); 1924 } 1925 (void) xdf_start_disconnect(vdp); 1926 break; 1927 case XenbusStateClosed: 1928 /* first check if BE closed unexpectedly */ 1929 mutex_enter(&vdp->xdf_dev_lk); 1930 if (xdf_isopen(vdp, -1)) { 1931 unexpect_die = B_TRUE; 1932 unexpectedie(vdp); 1933 cmn_err(CE_WARN, "xdf@%s: backend closed, " 1934 "reconnecting...", ddi_get_name_addr(dip)); 1935 } 1936 mutex_exit(&vdp->xdf_dev_lk); 1937 1938 if (vdp->xdf_status == XD_READY) { 1939 mutex_enter(&vdp->xdf_dev_lk); 1940 vdp->xdf_status = XD_CLOSING; 1941 mutex_exit(&vdp->xdf_dev_lk); 1942 1943 #ifdef DOMU_BACKEND 1944 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1945 #endif 1946 1947 xdf_post_disconnect(vdp); 1948 (void) xvdi_switch_state(dip, XBT_NULL, 1949 XenbusStateClosed); 1950 } else if ((vdp->xdf_status == XD_INIT) || 1951 (vdp->xdf_status == XD_CLOSING)) { 1952 xdf_post_disconnect(vdp); 1953 } else { 1954 mutex_enter(&vdp->xdf_dev_lk); 1955 vdp->xdf_status = XD_CLOSED; 1956 mutex_exit(&vdp->xdf_dev_lk); 1957 } 1958 } 1959 1960 /* notify anybody waiting for oe state change */ 1961 mutex_enter(&vdp->xdf_dev_lk); 1962 cv_broadcast(&vdp->xdf_dev_cv); 1963 mutex_exit(&vdp->xdf_dev_lk); 1964 1965 status = vdp->xdf_status; 1966 mutex_exit(&vdp->xdf_cb_lk); 1967 1968 if (status == XD_READY) { 1969 xdf_iostart(vdp); 1970 } else if ((status == XD_CLOSED) && !unexpect_die) { 1971 /* interface is closed successfully, remove all minor nodes */ 1972 cmlb_detach(vdp->xdf_vd_lbl, NULL); 1973 cmlb_free_handle(&vdp->xdf_vd_lbl); 1974 } 1975 } 1976 1977 /* check if partition is open, -1 - check all partitions on the disk */ 1978 static boolean_t 1979 xdf_isopen(xdf_t *vdp, int partition) 1980 { 1981 int i; 1982 ulong_t parbit; 1983 boolean_t rval = B_FALSE; 1984 1985 if (partition == -1) 1986 parbit = (ulong_t)-1; 1987 else 1988 parbit = 1 << partition; 1989 1990 for (i = 0; i < OTYPCNT; i++) { 1991 if (vdp->xdf_vd_open[i] & parbit) 1992 rval = B_TRUE; 1993 } 1994 1995 return (rval); 1996 } 1997 1998 /* 1999 * Xdf_check_state_transition will check the XenbusState change to see 2000 * if the change is a valid transition or not. 2001 * The new state is written by backend domain, or by running xenstore-write 2002 * to change it manually in dom0 2003 */ 2004 static int 2005 xdf_check_state_transition(xdf_t *vdp, XenbusState oestate) 2006 { 2007 int status; 2008 int stcheck; 2009 #define STOK 0 /* need further process */ 2010 #define STNOP 1 /* no action need taking */ 2011 #define STBUG 2 /* unexpected state change, could be a bug */ 2012 2013 status = vdp->xdf_status; 2014 stcheck = STOK; 2015 2016 switch (status) { 2017 case XD_UNKNOWN: 2018 if ((oestate == XenbusStateUnknown) || 2019 (oestate == XenbusStateConnected)) 2020 stcheck = STBUG; 2021 else if ((oestate == XenbusStateInitialising) || 2022 (oestate == XenbusStateInitWait) || 2023 (oestate == XenbusStateInitialised)) 2024 stcheck = STNOP; 2025 break; 2026 case XD_INIT: 2027 if (oestate == XenbusStateUnknown) 2028 stcheck = STBUG; 2029 else if ((oestate == XenbusStateInitialising) || 2030 (oestate == XenbusStateInitWait) || 2031 (oestate == XenbusStateInitialised)) 2032 stcheck = STNOP; 2033 break; 2034 case XD_READY: 2035 if ((oestate == XenbusStateUnknown) || 2036 (oestate == XenbusStateInitialising) || 2037 (oestate == XenbusStateInitWait) || 2038 (oestate == XenbusStateInitialised)) 2039 stcheck = STBUG; 2040 else if (oestate == XenbusStateConnected) 2041 stcheck = STNOP; 2042 break; 2043 case XD_CLOSING: 2044 if ((oestate == XenbusStateUnknown) || 2045 (oestate == XenbusStateInitialising) || 2046 (oestate == XenbusStateInitWait) || 2047 (oestate == XenbusStateInitialised) || 2048 (oestate == XenbusStateConnected)) 2049 stcheck = STBUG; 2050 else if (oestate == XenbusStateClosing) 2051 stcheck = STNOP; 2052 break; 2053 case XD_CLOSED: 2054 if ((oestate == XenbusStateUnknown) || 2055 (oestate == XenbusStateConnected)) 2056 stcheck = STBUG; 2057 else if ((oestate == XenbusStateInitWait) || 2058 (oestate == XenbusStateInitialised) || 2059 (oestate == XenbusStateClosing) || 2060 (oestate == XenbusStateClosed)) 2061 stcheck = STNOP; 2062 break; 2063 case XD_SUSPEND: 2064 default: 2065 stcheck = STBUG; 2066 } 2067 2068 if (stcheck == STOK) 2069 return (DDI_SUCCESS); 2070 2071 if (stcheck == STBUG) 2072 cmn_err(CE_NOTE, "xdf@%s: unexpected otherend " 2073 "state change to %d!, when status is %d", 2074 ddi_get_name_addr(vdp->xdf_dip), oestate, status); 2075 2076 return (DDI_FAILURE); 2077 } 2078 2079 static int 2080 xdf_connect(xdf_t *vdp, boolean_t wait) 2081 { 2082 ASSERT(mutex_owned(&vdp->xdf_dev_lk)); 2083 while (vdp->xdf_status != XD_READY) { 2084 if (!wait || (vdp->xdf_status > XD_READY)) 2085 break; 2086 2087 if (cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk) == 0) 2088 break; 2089 } 2090 2091 return (vdp->xdf_status); 2092 } 2093 2094 /* 2095 * callback func when DMA/GTE resources is available 2096 * 2097 * Note: we only register one callback function to grant table subsystem 2098 * since we only have one 'struct gnttab_free_callback' in xdf_t. 2099 */ 2100 static int 2101 xdf_dmacallback(caddr_t arg) 2102 { 2103 xdf_t *vdp = (xdf_t *)arg; 2104 ASSERT(vdp != NULL); 2105 2106 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 2107 ddi_get_name_addr(vdp->xdf_dip))); 2108 2109 ddi_trigger_softintr(vdp->xdf_softintr_id); 2110 return (DDI_DMA_CALLBACK_DONE); 2111 } 2112 2113 static uint_t 2114 xdf_iorestart(caddr_t arg) 2115 { 2116 xdf_t *vdp = (xdf_t *)arg; 2117 2118 ASSERT(vdp != NULL); 2119 2120 mutex_enter(&vdp->xdf_dev_lk); 2121 ASSERT(ISDMACBON(vdp)); 2122 SETDMACBOFF(vdp); 2123 mutex_exit(&vdp->xdf_dev_lk); 2124 2125 xdf_iostart(vdp); 2126 2127 return (DDI_INTR_CLAIMED); 2128 } 2129 2130 static void 2131 xdf_timeout_handler(void *arg) 2132 { 2133 xdf_t *vdp = arg; 2134 2135 mutex_enter(&vdp->xdf_dev_lk); 2136 vdp->xdf_timeout_id = 0; 2137 mutex_exit(&vdp->xdf_dev_lk); 2138 2139 /* new timeout thread could be re-scheduled */ 2140 xdf_iostart(vdp); 2141 } 2142 2143 /* 2144 * Alloc a vreq for this bp 2145 * bp->av_back contains the pointer to the vreq upon return 2146 */ 2147 static v_req_t * 2148 vreq_get(xdf_t *vdp, buf_t *bp) 2149 { 2150 v_req_t *vreq = NULL; 2151 2152 ASSERT(BP2VREQ(bp) == NULL); 2153 2154 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 2155 if (vreq == NULL) { 2156 if (vdp->xdf_timeout_id == 0) 2157 /* restart I/O after one second */ 2158 vdp->xdf_timeout_id = 2159 timeout(xdf_timeout_handler, vdp, hz); 2160 return (NULL); 2161 } 2162 bzero(vreq, sizeof (v_req_t)); 2163 2164 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 2165 bp->av_back = (buf_t *)vreq; 2166 vreq->v_buf = bp; 2167 vreq->v_status = VREQ_INIT; 2168 /* init of other fields in vreq is up to the caller */ 2169 2170 return (vreq); 2171 } 2172 2173 static void 2174 vreq_free(xdf_t *vdp, v_req_t *vreq) 2175 { 2176 buf_t *bp = vreq->v_buf; 2177 2178 list_remove(&vdp->xdf_vreq_act, (void *)vreq); 2179 2180 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 2181 goto done; 2182 2183 switch (vreq->v_status) { 2184 case VREQ_DMAWIN_DONE: 2185 case VREQ_GS_ALLOCED: 2186 case VREQ_DMABUF_BOUND: 2187 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 2188 /*FALLTHRU*/ 2189 case VREQ_DMAMEM_ALLOCED: 2190 if (!ALIGNED_XFER(bp)) { 2191 ASSERT(vreq->v_abuf != NULL); 2192 if (!IS_ERROR(bp) && IS_READ(bp)) 2193 bcopy(vreq->v_abuf, bp->b_un.b_addr, 2194 bp->b_bcount); 2195 ddi_dma_mem_free(&vreq->v_align); 2196 } 2197 /*FALLTHRU*/ 2198 case VREQ_MEMDMAHDL_ALLOCED: 2199 if (!ALIGNED_XFER(bp)) 2200 ddi_dma_free_handle(&vreq->v_memdmahdl); 2201 /*FALLTHRU*/ 2202 case VREQ_DMAHDL_ALLOCED: 2203 ddi_dma_free_handle(&vreq->v_dmahdl); 2204 break; 2205 default: 2206 break; 2207 } 2208 done: 2209 vreq->v_buf->av_back = NULL; 2210 kmem_cache_free(xdf_vreq_cache, vreq); 2211 } 2212 2213 /* 2214 * Initalize the DMA and grant table resources for the buf 2215 */ 2216 static int 2217 vreq_setup(xdf_t *vdp, v_req_t *vreq) 2218 { 2219 int rc; 2220 ddi_dma_attr_t dmaattr; 2221 uint_t ndcs, ndws; 2222 ddi_dma_handle_t dh; 2223 ddi_dma_handle_t mdh; 2224 ddi_dma_cookie_t dc; 2225 ddi_acc_handle_t abh; 2226 caddr_t aba; 2227 ge_slot_t *gs; 2228 size_t bufsz; 2229 off_t off; 2230 size_t sz; 2231 buf_t *bp = vreq->v_buf; 2232 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 2233 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 2234 2235 switch (vreq->v_status) { 2236 case VREQ_INIT: 2237 if (IS_FLUSH_DISKCACHE(bp)) { 2238 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2239 DPRINTF(DMA_DBG, ( 2240 "xdf@%s: get ge_slotfailed\n", 2241 ddi_get_name_addr(vdp->xdf_dip))); 2242 return (DDI_FAILURE); 2243 } 2244 vreq->v_blkno = 0; 2245 vreq->v_nslots = 1; 2246 vreq->v_gs = gs; 2247 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 2248 vreq->v_status = VREQ_GS_ALLOCED; 2249 gs->vreq = vreq; 2250 return (DDI_SUCCESS); 2251 } 2252 2253 if (IS_WRITE_BARRIER(vdp, bp)) 2254 vreq->v_flush_diskcache = WRITE_BARRIER; 2255 vreq->v_blkno = bp->b_blkno + 2256 (diskaddr_t)(uintptr_t)bp->b_private; 2257 bp->b_private = NULL; 2258 /* See if we wrote new data to our flush block */ 2259 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 2260 check_fbwrite(vdp, bp, vreq->v_blkno); 2261 vreq->v_status = VREQ_INIT_DONE; 2262 /*FALLTHRU*/ 2263 2264 case VREQ_INIT_DONE: 2265 /* 2266 * alloc DMA handle 2267 */ 2268 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 2269 xdf_dmacallback, (caddr_t)vdp, &dh); 2270 if (rc != DDI_SUCCESS) { 2271 SETDMACBON(vdp); 2272 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 2273 ddi_get_name_addr(vdp->xdf_dip))); 2274 return (DDI_FAILURE); 2275 } 2276 2277 vreq->v_dmahdl = dh; 2278 vreq->v_status = VREQ_DMAHDL_ALLOCED; 2279 /*FALLTHRU*/ 2280 2281 case VREQ_DMAHDL_ALLOCED: 2282 /* 2283 * alloc dma handle for 512-byte aligned buf 2284 */ 2285 if (!ALIGNED_XFER(bp)) { 2286 /* 2287 * XXPV: we need to temporarily enlarge the seg 2288 * boundary and s/g length to work round CR6381968 2289 */ 2290 dmaattr = xb_dma_attr; 2291 dmaattr.dma_attr_seg = (uint64_t)-1; 2292 dmaattr.dma_attr_sgllen = INT_MAX; 2293 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 2294 xdf_dmacallback, (caddr_t)vdp, &mdh); 2295 if (rc != DDI_SUCCESS) { 2296 SETDMACBON(vdp); 2297 DPRINTF(DMA_DBG, ("xdf@%s: unaligned buf DMA" 2298 "handle alloc failed\n", 2299 ddi_get_name_addr(vdp->xdf_dip))); 2300 return (DDI_FAILURE); 2301 } 2302 vreq->v_memdmahdl = mdh; 2303 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 2304 } 2305 /*FALLTHRU*/ 2306 2307 case VREQ_MEMDMAHDL_ALLOCED: 2308 /* 2309 * alloc 512-byte aligned buf 2310 */ 2311 if (!ALIGNED_XFER(bp)) { 2312 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 2313 bp_mapin(bp); 2314 2315 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 2316 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 2317 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 2318 &aba, &bufsz, &abh); 2319 if (rc != DDI_SUCCESS) { 2320 SETDMACBON(vdp); 2321 DPRINTF(DMA_DBG, ( 2322 "xdf@%s: DMA mem allocation failed\n", 2323 ddi_get_name_addr(vdp->xdf_dip))); 2324 return (DDI_FAILURE); 2325 } 2326 2327 vreq->v_abuf = aba; 2328 vreq->v_align = abh; 2329 vreq->v_status = VREQ_DMAMEM_ALLOCED; 2330 2331 ASSERT(bufsz >= bp->b_bcount); 2332 if (!IS_READ(bp)) 2333 bcopy(bp->b_un.b_addr, vreq->v_abuf, 2334 bp->b_bcount); 2335 } 2336 /*FALLTHRU*/ 2337 2338 case VREQ_DMAMEM_ALLOCED: 2339 /* 2340 * dma bind 2341 */ 2342 if (ALIGNED_XFER(bp)) { 2343 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 2344 dma_flags, xdf_dmacallback, (caddr_t)vdp, 2345 &dc, &ndcs); 2346 } else { 2347 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 2348 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 2349 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 2350 } 2351 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 2352 /* get num of dma windows */ 2353 if (rc == DDI_DMA_PARTIAL_MAP) { 2354 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 2355 ASSERT(rc == DDI_SUCCESS); 2356 } else { 2357 ndws = 1; 2358 } 2359 } else { 2360 SETDMACBON(vdp); 2361 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 2362 ddi_get_name_addr(vdp->xdf_dip))); 2363 return (DDI_FAILURE); 2364 } 2365 2366 vreq->v_dmac = dc; 2367 vreq->v_dmaw = 0; 2368 vreq->v_ndmacs = ndcs; 2369 vreq->v_ndmaws = ndws; 2370 vreq->v_nslots = ndws; 2371 vreq->v_status = VREQ_DMABUF_BOUND; 2372 /*FALLTHRU*/ 2373 2374 case VREQ_DMABUF_BOUND: 2375 /* 2376 * get ge_slot, callback is set upon failure from gs_get(), 2377 * if not set previously 2378 */ 2379 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2380 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2381 ddi_get_name_addr(vdp->xdf_dip))); 2382 return (DDI_FAILURE); 2383 } 2384 2385 vreq->v_gs = gs; 2386 gs->vreq = vreq; 2387 vreq->v_status = VREQ_GS_ALLOCED; 2388 break; 2389 2390 case VREQ_GS_ALLOCED: 2391 /* nothing need to be done */ 2392 break; 2393 2394 case VREQ_DMAWIN_DONE: 2395 /* 2396 * move to the next dma window 2397 */ 2398 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 2399 2400 /* get a ge_slot for this DMA window */ 2401 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2402 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2403 ddi_get_name_addr(vdp->xdf_dip))); 2404 return (DDI_FAILURE); 2405 } 2406 2407 vreq->v_gs = gs; 2408 gs->vreq = vreq; 2409 vreq->v_dmaw++; 2410 rc = ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 2411 &vreq->v_dmac, &vreq->v_ndmacs); 2412 ASSERT(rc == DDI_SUCCESS); 2413 vreq->v_status = VREQ_GS_ALLOCED; 2414 break; 2415 2416 default: 2417 return (DDI_FAILURE); 2418 } 2419 2420 return (DDI_SUCCESS); 2421 } 2422 2423 static ge_slot_t * 2424 gs_get(xdf_t *vdp, int isread) 2425 { 2426 grant_ref_t gh; 2427 ge_slot_t *gs; 2428 2429 /* try to alloc GTEs needed in this slot, first */ 2430 if (gnttab_alloc_grant_references( 2431 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 2432 if (vdp->xdf_gnt_callback.next == NULL) { 2433 SETDMACBON(vdp); 2434 gnttab_request_free_callback( 2435 &vdp->xdf_gnt_callback, 2436 (void (*)(void *))xdf_dmacallback, 2437 (void *)vdp, 2438 BLKIF_MAX_SEGMENTS_PER_REQUEST); 2439 } 2440 return (NULL); 2441 } 2442 2443 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 2444 if (gs == NULL) { 2445 gnttab_free_grant_references(gh); 2446 if (vdp->xdf_timeout_id == 0) 2447 /* restart I/O after one second */ 2448 vdp->xdf_timeout_id = 2449 timeout(xdf_timeout_handler, vdp, hz); 2450 return (NULL); 2451 } 2452 2453 /* init gs_slot */ 2454 list_insert_head(&vdp->xdf_gs_act, (void *)gs); 2455 gs->oeid = vdp->xdf_peer; 2456 gs->isread = isread; 2457 gs->ghead = gh; 2458 gs->ngrefs = 0; 2459 2460 return (gs); 2461 } 2462 2463 static void 2464 gs_free(xdf_t *vdp, ge_slot_t *gs) 2465 { 2466 int i; 2467 grant_ref_t *gp = gs->ge; 2468 int ngrefs = gs->ngrefs; 2469 boolean_t isread = gs->isread; 2470 2471 list_remove(&vdp->xdf_gs_act, (void *)gs); 2472 2473 /* release all grant table entry resources used in this slot */ 2474 for (i = 0; i < ngrefs; i++, gp++) 2475 gnttab_end_foreign_access(*gp, !isread, 0); 2476 gnttab_free_grant_references(gs->ghead); 2477 2478 kmem_cache_free(xdf_gs_cache, (void *)gs); 2479 } 2480 2481 static grant_ref_t 2482 gs_grant(ge_slot_t *gs, mfn_t mfn) 2483 { 2484 grant_ref_t gr = gnttab_claim_grant_reference(&gs->ghead); 2485 2486 ASSERT(gr != -1); 2487 ASSERT(gs->ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 2488 gs->ge[gs->ngrefs++] = gr; 2489 gnttab_grant_foreign_access_ref(gr, gs->oeid, mfn, !gs->isread); 2490 2491 return (gr); 2492 } 2493 2494 static void 2495 unexpectedie(xdf_t *vdp) 2496 { 2497 /* clean up I/Os in ring that have responses */ 2498 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 2499 mutex_exit(&vdp->xdf_dev_lk); 2500 (void) xdf_intr((caddr_t)vdp); 2501 mutex_enter(&vdp->xdf_dev_lk); 2502 } 2503 2504 /* free up all grant table entries */ 2505 while (!list_is_empty(&vdp->xdf_gs_act)) 2506 gs_free(vdp, list_head(&vdp->xdf_gs_act)); 2507 2508 /* 2509 * move bp back to active list orderly 2510 * vreq_busy is updated in vreq_free() 2511 */ 2512 while (!list_is_empty(&vdp->xdf_vreq_act)) { 2513 v_req_t *vreq = list_head(&vdp->xdf_vreq_act); 2514 buf_t *bp = vreq->v_buf; 2515 2516 bp->av_back = NULL; 2517 bp->b_resid = bp->b_bcount; 2518 if (vdp->xdf_f_act == NULL) { 2519 vdp->xdf_f_act = vdp->xdf_l_act = bp; 2520 } else { 2521 /* move to the head of list */ 2522 bp->av_forw = vdp->xdf_f_act; 2523 vdp->xdf_f_act = bp; 2524 } 2525 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 2526 vreq_free(vdp, vreq); 2527 } 2528 } 2529 2530 static void 2531 xdfmin(struct buf *bp) 2532 { 2533 if (bp->b_bcount > xdf_maxphys) 2534 bp->b_bcount = xdf_maxphys; 2535 } 2536