1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * xdf.c - Xen Virtual Block Device Driver 29 * TODO: 30 * - support alternate block size (currently only DEV_BSIZE supported) 31 * - revalidate geometry for removable devices 32 */ 33 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 36 #include "xdf.h" 37 38 #define FLUSH_DISKCACHE 0x1 39 #define WRITE_BARRIER 0x2 40 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 41 #define USE_WRITE_BARRIER(vdp) \ 42 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 43 #define USE_FLUSH_DISKCACHE(vdp) \ 44 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 45 #define IS_WRITE_BARRIER(vdp, bp) \ 46 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 47 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 48 #define IS_FLUSH_DISKCACHE(bp) \ 49 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 50 51 static void *vbd_ss; 52 static kmem_cache_t *xdf_vreq_cache; 53 static kmem_cache_t *xdf_gs_cache; 54 static int xdf_maxphys = XB_MAXPHYS; 55 int xdfdebug = 0; 56 extern int do_polled_io; 57 diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 58 int xdf_barrier_flush_disable = 0; 59 60 /* 61 * dev_ops and cb_ops entrypoints 62 */ 63 static int xdf_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 64 static int xdf_attach(dev_info_t *, ddi_attach_cmd_t); 65 static int xdf_detach(dev_info_t *, ddi_detach_cmd_t); 66 static int xdf_reset(dev_info_t *, ddi_reset_cmd_t); 67 static int xdf_open(dev_t *, int, int, cred_t *); 68 static int xdf_close(dev_t, int, int, struct cred *); 69 static int xdf_strategy(struct buf *); 70 static int xdf_read(dev_t, struct uio *, cred_t *); 71 static int xdf_aread(dev_t, struct aio_req *, cred_t *); 72 static int xdf_write(dev_t, struct uio *, cred_t *); 73 static int xdf_awrite(dev_t, struct aio_req *, cred_t *); 74 static int xdf_dump(dev_t, caddr_t, daddr_t, int); 75 static int xdf_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 76 static uint_t xdf_intr(caddr_t); 77 static int xdf_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 78 caddr_t, int *); 79 80 /* 81 * misc private functions 82 */ 83 static int xdf_suspend(dev_info_t *); 84 static int xdf_resume(dev_info_t *); 85 static int xdf_start_connect(xdf_t *); 86 static int xdf_start_disconnect(xdf_t *); 87 static int xdf_post_connect(xdf_t *); 88 static void xdf_post_disconnect(xdf_t *); 89 static void xdf_oe_change(dev_info_t *, ddi_eventcookie_t, void *, void *); 90 static void xdf_iostart(xdf_t *); 91 static void xdf_iofini(xdf_t *, uint64_t, int); 92 static int xdf_prepare_rreq(xdf_t *, struct buf *, blkif_request_t *); 93 static int xdf_drain_io(xdf_t *); 94 static boolean_t xdf_isopen(xdf_t *, int); 95 static int xdf_check_state_transition(xdf_t *, XenbusState); 96 static int xdf_connect(xdf_t *, boolean_t); 97 static int xdf_dmacallback(caddr_t); 98 static void xdf_timeout_handler(void *); 99 static uint_t xdf_iorestart(caddr_t); 100 static v_req_t *vreq_get(xdf_t *, buf_t *); 101 static void vreq_free(xdf_t *, v_req_t *); 102 static int vreq_setup(xdf_t *, v_req_t *); 103 static ge_slot_t *gs_get(xdf_t *, int); 104 static void gs_free(xdf_t *, ge_slot_t *); 105 static grant_ref_t gs_grant(ge_slot_t *, mfn_t); 106 static void unexpectedie(xdf_t *); 107 static void xdfmin(struct buf *); 108 109 static struct cb_ops xdf_cbops = { 110 xdf_open, 111 xdf_close, 112 xdf_strategy, 113 nodev, 114 xdf_dump, 115 xdf_read, 116 xdf_write, 117 xdf_ioctl, 118 nodev, 119 nodev, 120 nodev, 121 nochpoll, 122 xdf_prop_op, 123 NULL, 124 D_MP | D_NEW | D_64BIT, 125 CB_REV, 126 xdf_aread, 127 xdf_awrite 128 }; 129 130 struct dev_ops xdf_devops = { 131 DEVO_REV, /* devo_rev */ 132 0, /* devo_refcnt */ 133 xdf_getinfo, /* devo_getinfo */ 134 nulldev, /* devo_identify */ 135 nulldev, /* devo_probe */ 136 xdf_attach, /* devo_attach */ 137 xdf_detach, /* devo_detach */ 138 xdf_reset, /* devo_reset */ 139 &xdf_cbops, /* devo_cb_ops */ 140 (struct bus_ops *)NULL /* devo_bus_ops */ 141 }; 142 143 static struct modldrv modldrv = { 144 &mod_driverops, /* Type of module. This one is a driver */ 145 "virtual block driver %I%", /* short description */ 146 &xdf_devops /* driver specific ops */ 147 }; 148 149 static struct modlinkage xdf_modlinkage = { 150 MODREV_1, (void *)&modldrv, NULL 151 }; 152 153 /* 154 * I/O buffer DMA attributes 155 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 156 */ 157 static ddi_dma_attr_t xb_dma_attr = { 158 DMA_ATTR_V0, 159 (uint64_t)0, /* lowest address */ 160 (uint64_t)0xffffffffffffffff, /* highest usable address */ 161 (uint64_t)0xffffff, /* DMA counter limit max */ 162 (uint64_t)XB_BSIZE, /* alignment in bytes */ 163 XB_BSIZE - 1, /* bitmap of burst sizes */ 164 XB_BSIZE, /* min transfer */ 165 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 166 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 167 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 168 XB_BSIZE, /* granularity */ 169 0, /* flags (reserved) */ 170 }; 171 172 static ddi_device_acc_attr_t xc_acc_attr = { 173 DDI_DEVICE_ATTR_V0, 174 DDI_NEVERSWAP_ACC, 175 DDI_STRICTORDER_ACC 176 }; 177 178 /* callbacks from commmon label */ 179 180 static int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 181 void *); 182 static int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 183 184 static cmlb_tg_ops_t xdf_lb_ops = { 185 TG_DK_OPS_VERSION_1, 186 xdf_lb_rdwr, 187 xdf_lb_getinfo 188 }; 189 190 int 191 _init(void) 192 { 193 int rc; 194 195 if ((rc = ddi_soft_state_init(&vbd_ss, sizeof (xdf_t), 0)) == 0) { 196 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 197 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 198 ASSERT(xdf_vreq_cache != NULL); 199 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 200 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 201 ASSERT(xdf_gs_cache != NULL); 202 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 203 kmem_cache_destroy(xdf_vreq_cache); 204 kmem_cache_destroy(xdf_gs_cache); 205 ddi_soft_state_fini(&vbd_ss); 206 } 207 } 208 209 return (rc); 210 } 211 212 int 213 _fini(void) 214 { 215 int err; 216 217 if ((err = mod_remove(&xdf_modlinkage)) != 0) 218 return (err); 219 220 kmem_cache_destroy(xdf_vreq_cache); 221 kmem_cache_destroy(xdf_gs_cache); 222 ddi_soft_state_fini(&vbd_ss); 223 224 return (0); 225 } 226 227 int 228 _info(struct modinfo *modinfop) 229 { 230 return (mod_info(&xdf_modlinkage, modinfop)); 231 } 232 233 /*ARGSUSED*/ 234 static int 235 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 236 { 237 int instance; 238 xdf_t *vbdp; 239 240 instance = XDF_INST(getminor((dev_t)arg)); 241 242 switch (cmd) { 243 case DDI_INFO_DEVT2DEVINFO: 244 if ((vbdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) { 245 *rp = NULL; 246 return (DDI_FAILURE); 247 } 248 *rp = vbdp->xdf_dip; 249 return (DDI_SUCCESS); 250 251 case DDI_INFO_DEVT2INSTANCE: 252 *rp = (void *)(uintptr_t)instance; 253 return (DDI_SUCCESS); 254 255 default: 256 return (DDI_FAILURE); 257 } 258 } 259 260 static int 261 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 262 char *name, caddr_t valuep, int *lengthp) 263 { 264 int instance = ddi_get_instance(dip); 265 xdf_t *vdp; 266 diskaddr_t p_blkcnt; 267 268 /* 269 * xdf dynamic properties are device specific and size oriented. 270 * Requests issued under conditions where size is valid are passed 271 * to ddi_prop_op_nblocks with the size information, otherwise the 272 * request is passed to ddi_prop_op. 273 */ 274 vdp = ddi_get_soft_state(vbd_ss, instance); 275 276 if ((dev == DDI_DEV_T_ANY) || (vdp == NULL)) 277 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 278 name, valuep, lengthp)); 279 280 /* do cv_wait until connected or failed */ 281 mutex_enter(&vdp->xdf_dev_lk); 282 if (xdf_connect(vdp, B_TRUE) != XD_READY) { 283 mutex_exit(&vdp->xdf_dev_lk); 284 goto out; 285 } 286 mutex_exit(&vdp->xdf_dev_lk); 287 288 if (cmlb_partinfo(vdp->xdf_vd_lbl, XDF_PART(getminor(dev)), &p_blkcnt, 289 NULL, NULL, NULL, NULL) == 0) 290 return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags, 291 name, valuep, lengthp, (uint64_t)p_blkcnt)); 292 293 out: 294 return (ddi_prop_op(dev, dip, prop_op, mod_flags, name, valuep, 295 lengthp)); 296 } 297 298 static int 299 xdf_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 300 { 301 xdf_t *vdp; 302 ddi_iblock_cookie_t ibc; 303 ddi_iblock_cookie_t softibc; 304 int instance; 305 306 xdfdebug = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_NOTPROM, 307 "xdfdebug", 0); 308 309 switch (cmd) { 310 case DDI_ATTACH: 311 break; 312 313 case DDI_RESUME: 314 return (xdf_resume(devi)); 315 316 default: 317 return (DDI_FAILURE); 318 } 319 320 instance = ddi_get_instance(devi); 321 if (ddi_soft_state_zalloc(vbd_ss, instance) != DDI_SUCCESS) 322 return (DDI_FAILURE); 323 324 DPRINTF(DDI_DBG, ("xdf%d: attaching\n", instance)); 325 vdp = ddi_get_soft_state(vbd_ss, instance); 326 vdp->xdf_dip = devi; 327 if (ddi_get_iblock_cookie(devi, 0, &ibc) != DDI_SUCCESS) { 328 cmn_err(CE_WARN, "xdf@%s: failed to get iblock cookie", 329 ddi_get_name_addr(devi)); 330 goto errout1; 331 } 332 333 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc); 334 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc); 335 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 336 ddi_set_driver_private(devi, vdp); 337 338 if (ddi_get_soft_iblock_cookie(devi, DDI_SOFTINT_LOW, &softibc) 339 != DDI_SUCCESS) { 340 cmn_err(CE_WARN, "xdf@%s: failed to get softintr iblock cookie", 341 ddi_get_name_addr(devi)); 342 goto errout2; 343 } 344 if (ddi_add_softintr(devi, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 345 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 346 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 347 ddi_get_name_addr(devi)); 348 goto errout2; 349 } 350 351 /* 352 * create kstat for iostat(1M) 353 */ 354 if ((vdp->xdf_xdev_iostat = kstat_create("xdf", instance, NULL, "disk", 355 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) != NULL) { 356 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 357 kstat_install(vdp->xdf_xdev_iostat); 358 } else { 359 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 360 ddi_get_name_addr(devi)); 361 goto errout3; 362 } 363 364 /* 365 * driver handles kernel-issued IOCTLs 366 */ 367 if (ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP, 368 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 369 cmn_err(CE_WARN, "xdf@%s: cannot create DDI_KERNEL_IOCTL prop", 370 ddi_get_name_addr(devi)); 371 goto errout4; 372 } 373 374 /* 375 * create default device minor nodes: non-removable disk 376 * we will adjust minor nodes after we are connected w/ backend 377 */ 378 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 379 if (cmlb_attach(devi, &xdf_lb_ops, DTYPE_DIRECT, 0, 1, DDI_NT_BLOCK, 380 CMLB_FAKE_LABEL_ONE_PARTITION, vdp->xdf_vd_lbl, NULL) != 0) { 381 cmn_err(CE_WARN, "xdf@%s: default cmlb attach failed", 382 ddi_get_name_addr(devi)); 383 goto errout5; 384 } 385 386 /* 387 * We ship with cache-enabled disks 388 */ 389 vdp->xdf_wce = 1; 390 391 mutex_enter(&vdp->xdf_cb_lk); 392 393 /* Watch backend XenbusState change */ 394 if (xvdi_add_event_handler(devi, XS_OE_STATE, 395 xdf_oe_change) != DDI_SUCCESS) { 396 mutex_exit(&vdp->xdf_cb_lk); 397 goto errout6; 398 } 399 400 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 401 cmn_err(CE_WARN, "xdf@%s: start connection failed", 402 ddi_get_name_addr(devi)); 403 (void) xdf_start_disconnect(vdp); 404 mutex_exit(&vdp->xdf_cb_lk); 405 goto errout7; 406 } 407 408 mutex_exit(&vdp->xdf_cb_lk); 409 410 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 411 offsetof(v_req_t, v_link)); 412 list_create(&vdp->xdf_gs_act, sizeof (ge_slot_t), 413 offsetof(ge_slot_t, link)); 414 415 ddi_report_dev(devi); 416 DPRINTF(DDI_DBG, ("xdf%d: attached\n", instance)); 417 418 return (DDI_SUCCESS); 419 420 errout7: 421 xvdi_remove_event_handler(devi, XS_OE_STATE); 422 errout6: 423 cmlb_detach(vdp->xdf_vd_lbl, NULL); 424 errout5: 425 cmlb_free_handle(&vdp->xdf_vd_lbl); 426 ddi_prop_remove_all(devi); 427 errout4: 428 kstat_delete(vdp->xdf_xdev_iostat); 429 errout3: 430 ddi_remove_softintr(vdp->xdf_softintr_id); 431 errout2: 432 ddi_set_driver_private(devi, NULL); 433 cv_destroy(&vdp->xdf_dev_cv); 434 mutex_destroy(&vdp->xdf_cb_lk); 435 mutex_destroy(&vdp->xdf_dev_lk); 436 errout1: 437 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(devi)); 438 ddi_soft_state_free(vbd_ss, instance); 439 return (DDI_FAILURE); 440 } 441 442 static int 443 xdf_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 444 { 445 xdf_t *vdp; 446 int instance; 447 448 switch (cmd) { 449 450 case DDI_PM_SUSPEND: 451 break; 452 453 case DDI_SUSPEND: 454 return (xdf_suspend(devi)); 455 456 case DDI_DETACH: 457 break; 458 459 default: 460 return (DDI_FAILURE); 461 } 462 463 instance = ddi_get_instance(devi); 464 DPRINTF(DDI_DBG, ("xdf%d: detaching\n", instance)); 465 vdp = ddi_get_soft_state(vbd_ss, instance); 466 467 if (vdp == NULL) 468 return (DDI_FAILURE); 469 470 mutex_enter(&vdp->xdf_dev_lk); 471 if (xdf_isopen(vdp, -1)) { 472 mutex_exit(&vdp->xdf_dev_lk); 473 return (DDI_FAILURE); 474 } 475 476 if (vdp->xdf_status != XD_CLOSED) { 477 mutex_exit(&vdp->xdf_dev_lk); 478 return (DDI_FAILURE); 479 } 480 481 ASSERT(!ISDMACBON(vdp)); 482 mutex_exit(&vdp->xdf_dev_lk); 483 484 if (vdp->xdf_timeout_id != 0) 485 (void) untimeout(vdp->xdf_timeout_id); 486 487 xvdi_remove_event_handler(devi, XS_OE_STATE); 488 489 /* we'll support backend running in domU later */ 490 #ifdef DOMU_BACKEND 491 (void) xvdi_post_event(devi, XEN_HP_REMOVE); 492 #endif 493 494 list_destroy(&vdp->xdf_vreq_act); 495 list_destroy(&vdp->xdf_gs_act); 496 ddi_prop_remove_all(devi); 497 kstat_delete(vdp->xdf_xdev_iostat); 498 ddi_remove_softintr(vdp->xdf_softintr_id); 499 ddi_set_driver_private(devi, NULL); 500 cv_destroy(&vdp->xdf_dev_cv); 501 mutex_destroy(&vdp->xdf_cb_lk); 502 mutex_destroy(&vdp->xdf_dev_lk); 503 if (vdp->xdf_cache_flush_block != NULL) 504 kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE); 505 ddi_soft_state_free(vbd_ss, instance); 506 return (DDI_SUCCESS); 507 } 508 509 static int 510 xdf_suspend(dev_info_t *devi) 511 { 512 xdf_t *vdp; 513 int instance; 514 515 instance = ddi_get_instance(devi); 516 517 if (xdfdebug & SUSRES_DBG) 518 xen_printf("xdf_suspend: xdf#%d\n", instance); 519 520 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 521 return (DDI_FAILURE); 522 523 xvdi_suspend(devi); 524 525 /* stop further I/O requests */ 526 mutex_enter(&vdp->xdf_cb_lk); 527 mutex_enter(&vdp->xdf_dev_lk); 528 vdp->xdf_status = XD_SUSPEND; 529 mutex_exit(&vdp->xdf_dev_lk); 530 mutex_exit(&vdp->xdf_cb_lk); 531 532 /* make sure no more I/O responses left in the ring buffer */ 533 (void) ddi_remove_intr(devi, 0, NULL); 534 (void) xdf_drain_io(vdp); 535 536 if (xdfdebug & SUSRES_DBG) 537 xen_printf("xdf_suspend: SUCCESS\n"); 538 539 return (DDI_SUCCESS); 540 } 541 542 /*ARGSUSED*/ 543 static int 544 xdf_resume(dev_info_t *devi) 545 { 546 xdf_t *vdp; 547 int instance; 548 549 instance = ddi_get_instance(devi); 550 if (xdfdebug & SUSRES_DBG) 551 xen_printf("xdf_resume: xdf%d\n", instance); 552 553 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 554 return (DDI_FAILURE); 555 556 mutex_enter(&vdp->xdf_cb_lk); 557 558 if (xvdi_resume(devi) != DDI_SUCCESS) { 559 mutex_exit(&vdp->xdf_cb_lk); 560 return (DDI_FAILURE); 561 } 562 563 mutex_enter(&vdp->xdf_dev_lk); 564 ASSERT(vdp->xdf_status == XD_SUSPEND); 565 vdp->xdf_status = XD_UNKNOWN; 566 mutex_exit(&vdp->xdf_dev_lk); 567 568 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 569 mutex_exit(&vdp->xdf_cb_lk); 570 return (DDI_FAILURE); 571 } 572 573 mutex_exit(&vdp->xdf_cb_lk); 574 575 if (xdfdebug & SUSRES_DBG) 576 xen_printf("xdf_resume: done\n"); 577 return (DDI_SUCCESS); 578 } 579 580 /*ARGSUSED*/ 581 static int 582 xdf_reset(dev_info_t *devi, ddi_reset_cmd_t cmd) 583 { 584 xdf_t *vdp; 585 int instance; 586 587 instance = ddi_get_instance(devi); 588 DPRINTF(DDI_DBG, ("xdf%d: resetting\n", instance)); 589 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 590 return (DDI_FAILURE); 591 592 /* 593 * wait for any outstanding I/O to complete 594 */ 595 (void) xdf_drain_io(vdp); 596 597 DPRINTF(DDI_DBG, ("xdf%d: reset complete\n", instance)); 598 return (DDI_SUCCESS); 599 } 600 601 static int 602 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 603 { 604 minor_t minor; 605 xdf_t *vdp; 606 int part; 607 ulong_t parbit; 608 diskaddr_t p_blkct = 0; 609 boolean_t firstopen; 610 611 minor = getminor(*devp); 612 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 613 return (ENXIO); 614 615 DPRINTF(DDI_DBG, ("xdf%d: opening\n", XDF_INST(minor))); 616 617 /* do cv_wait until connected or failed */ 618 mutex_enter(&vdp->xdf_dev_lk); 619 if (xdf_connect(vdp, B_TRUE) != XD_READY) { 620 mutex_exit(&vdp->xdf_dev_lk); 621 return (ENXIO); 622 } 623 624 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 625 mutex_exit(&vdp->xdf_dev_lk); 626 return (EROFS); 627 } 628 629 part = XDF_PART(minor); 630 parbit = 1 << part; 631 if (vdp->xdf_vd_exclopen & parbit) { 632 mutex_exit(&vdp->xdf_dev_lk); 633 return (EBUSY); 634 } 635 636 /* are we the first one to open this node? */ 637 firstopen = !xdf_isopen(vdp, -1); 638 639 if ((flag & FEXCL) && !firstopen) { 640 mutex_exit(&vdp->xdf_dev_lk); 641 return (EBUSY); 642 } 643 644 if (otyp == OTYP_LYR) 645 vdp->xdf_vd_lyropen[part]++; 646 647 vdp->xdf_vd_open[otyp] |= parbit; 648 649 if (flag & FEXCL) 650 vdp->xdf_vd_exclopen |= parbit; 651 652 mutex_exit(&vdp->xdf_dev_lk); 653 654 /* force a re-validation */ 655 if (firstopen) 656 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 657 658 /* 659 * check size 660 * ignore CD/DVD which contains a zero-sized s0 661 */ 662 if (!(flag & (FNDELAY | FNONBLOCK)) && !XD_IS_CD(vdp) && 663 ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 664 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0))) { 665 (void) xdf_close(*devp, flag, otyp, credp); 666 return (ENXIO); 667 } 668 669 return (0); 670 } 671 672 /*ARGSUSED*/ 673 static int 674 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 675 { 676 minor_t minor; 677 xdf_t *vdp; 678 int part; 679 ulong_t parbit; 680 681 minor = getminor(dev); 682 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 683 return (ENXIO); 684 685 mutex_enter(&vdp->xdf_dev_lk); 686 part = XDF_PART(minor); 687 if (!xdf_isopen(vdp, part)) { 688 mutex_exit(&vdp->xdf_dev_lk); 689 return (ENXIO); 690 } 691 parbit = 1 << part; 692 693 if (otyp == OTYP_LYR) { 694 if (vdp->xdf_vd_lyropen[part] != 0) 695 vdp->xdf_vd_lyropen[part]--; 696 if (vdp->xdf_vd_lyropen[part] == 0) 697 vdp->xdf_vd_open[OTYP_LYR] &= ~parbit; 698 } else { 699 vdp->xdf_vd_open[otyp] &= ~parbit; 700 } 701 vdp->xdf_vd_exclopen &= ~parbit; 702 703 mutex_exit(&vdp->xdf_dev_lk); 704 return (0); 705 } 706 707 static int 708 xdf_strategy(struct buf *bp) 709 { 710 xdf_t *vdp; 711 minor_t minor; 712 diskaddr_t p_blkct, p_blkst; 713 ulong_t nblks; 714 int part; 715 716 minor = getminor(bp->b_edev); 717 part = XDF_PART(minor); 718 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) || 719 !xdf_isopen(vdp, part) || 720 cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 721 &p_blkst, NULL, NULL, NULL)) { 722 bioerror(bp, ENXIO); 723 bp->b_resid = bp->b_bcount; 724 biodone(bp); 725 return (0); 726 } 727 728 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 729 bioerror(bp, EROFS); 730 bp->b_resid = bp->b_bcount; 731 biodone(bp); 732 return (0); 733 } 734 735 /* 736 * starting beyond partition 737 */ 738 if (bp->b_blkno > p_blkct) { 739 DPRINTF(IO_DBG, ("xdf: block %lld exceeds VBD size %"PRIu64, 740 (longlong_t)bp->b_blkno, (uint64_t)p_blkct)); 741 bioerror(bp, EINVAL); 742 bp->b_resid = bp->b_bcount; 743 biodone(bp); 744 return (0); 745 } 746 747 /* Legacy: don't set error flag at this case */ 748 if (bp->b_blkno == p_blkct) { 749 bp->b_resid = bp->b_bcount; 750 biodone(bp); 751 return (0); 752 } 753 754 /* 755 * adjust for partial transfer 756 */ 757 nblks = bp->b_bcount >> XB_BSHIFT; 758 if ((bp->b_blkno + nblks) > p_blkct) { 759 bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT; 760 bp->b_bcount -= bp->b_resid; 761 } 762 763 764 DPRINTF(IO_DBG, ("xdf: strategy blk %lld len %lu\n", 765 (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount)); 766 767 mutex_enter(&vdp->xdf_dev_lk); 768 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 769 if (vdp->xdf_f_act == NULL) { 770 vdp->xdf_f_act = vdp->xdf_l_act = bp; 771 } else { 772 vdp->xdf_l_act->av_forw = bp; 773 vdp->xdf_l_act = bp; 774 } 775 bp->av_forw = NULL; 776 bp->av_back = NULL; /* not tagged with a v_req */ 777 bp->b_private = (void *)(uintptr_t)p_blkst; 778 mutex_exit(&vdp->xdf_dev_lk); 779 xdf_iostart(vdp); 780 if (do_polled_io) 781 (void) xdf_drain_io(vdp); 782 return (0); 783 } 784 785 /*ARGSUSED*/ 786 static int 787 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 788 { 789 790 xdf_t *vdp; 791 minor_t minor; 792 diskaddr_t p_blkcnt; 793 int part; 794 795 minor = getminor(dev); 796 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 797 return (ENXIO); 798 799 DPRINTF(IO_DBG, ("xdf: read offset 0x%"PRIx64"\n", 800 (int64_t)uiop->uio_offset)); 801 802 part = XDF_PART(minor); 803 if (!xdf_isopen(vdp, part)) 804 return (ENXIO); 805 806 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 807 NULL, NULL, NULL, NULL)) 808 return (ENXIO); 809 810 if (U_INVAL(uiop)) 811 return (EINVAL); 812 813 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 814 } 815 816 /*ARGSUSED*/ 817 static int 818 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 819 { 820 xdf_t *vdp; 821 minor_t minor; 822 diskaddr_t p_blkcnt; 823 int part; 824 825 minor = getminor(dev); 826 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 827 return (ENXIO); 828 829 DPRINTF(IO_DBG, ("xdf: write offset 0x%"PRIx64"\n", 830 (int64_t)uiop->uio_offset)); 831 832 part = XDF_PART(minor); 833 if (!xdf_isopen(vdp, part)) 834 return (ENXIO); 835 836 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 837 NULL, NULL, NULL, NULL)) 838 return (ENXIO); 839 840 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 841 return (ENOSPC); 842 843 if (U_INVAL(uiop)) 844 return (EINVAL); 845 846 return (physio(xdf_strategy, NULL, dev, B_WRITE, minphys, uiop)); 847 } 848 849 /*ARGSUSED*/ 850 static int 851 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 852 { 853 xdf_t *vdp; 854 minor_t minor; 855 struct uio *uiop = aiop->aio_uio; 856 diskaddr_t p_blkcnt; 857 int part; 858 859 minor = getminor(dev); 860 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 861 return (ENXIO); 862 863 part = XDF_PART(minor); 864 if (!xdf_isopen(vdp, part)) 865 return (ENXIO); 866 867 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 868 NULL, NULL, NULL, NULL)) 869 return (ENXIO); 870 871 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 872 return (ENOSPC); 873 874 if (U_INVAL(uiop)) 875 return (EINVAL); 876 877 return (aphysio(xdf_strategy, anocancel, dev, B_READ, minphys, aiop)); 878 } 879 880 /*ARGSUSED*/ 881 static int 882 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 883 { 884 xdf_t *vdp; 885 minor_t minor; 886 struct uio *uiop = aiop->aio_uio; 887 diskaddr_t p_blkcnt; 888 int part; 889 890 minor = getminor(dev); 891 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 892 return (ENXIO); 893 894 part = XDF_PART(minor); 895 if (!xdf_isopen(vdp, part)) 896 return (ENXIO); 897 898 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 899 NULL, NULL, NULL, NULL)) 900 return (ENXIO); 901 902 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 903 return (ENOSPC); 904 905 if (U_INVAL(uiop)) 906 return (EINVAL); 907 908 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, minphys, aiop)); 909 } 910 911 static int 912 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 913 { 914 struct buf dumpbuf, *dbp; 915 xdf_t *vdp; 916 minor_t minor; 917 int err = 0; 918 int part; 919 diskaddr_t p_blkcnt, p_blkst; 920 921 minor = getminor(dev); 922 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 923 return (ENXIO); 924 925 DPRINTF(IO_DBG, ("xdf: dump addr (0x%p) blk (%ld) nblks (%d)\n", 926 addr, blkno, nblk)); 927 928 part = XDF_PART(minor); 929 if (!xdf_isopen(vdp, part)) 930 return (ENXIO); 931 932 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 933 NULL, NULL, NULL)) 934 return (ENXIO); 935 936 if ((blkno + nblk) > p_blkcnt) { 937 cmn_err(CE_WARN, "xdf: block %ld exceeds VBD size %"PRIu64, 938 blkno + nblk, (uint64_t)vdp->xdf_xdev_nblocks); 939 return (EINVAL); 940 } 941 942 dbp = &dumpbuf; 943 bioinit(dbp); 944 dbp->b_flags = B_BUSY; 945 dbp->b_un.b_addr = addr; 946 dbp->b_bcount = nblk << DEV_BSHIFT; 947 dbp->b_resid = 0; 948 dbp->b_blkno = blkno; 949 dbp->b_edev = dev; 950 dbp->b_private = (void *)(uintptr_t)p_blkst; 951 952 mutex_enter(&vdp->xdf_dev_lk); 953 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 954 if (vdp->xdf_f_act == NULL) { 955 vdp->xdf_f_act = vdp->xdf_l_act = dbp; 956 } else { 957 vdp->xdf_l_act->av_forw = dbp; 958 vdp->xdf_l_act = dbp; 959 } 960 dbp->av_forw = NULL; 961 dbp->av_back = NULL; 962 mutex_exit(&vdp->xdf_dev_lk); 963 xdf_iostart(vdp); 964 err = xdf_drain_io(vdp); 965 biofini(dbp); 966 return (err); 967 } 968 969 /*ARGSUSED*/ 970 static int 971 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 972 int *rvalp) 973 { 974 int instance; 975 xdf_t *vdp; 976 minor_t minor; 977 int part; 978 979 minor = getminor(dev); 980 instance = XDF_INST(minor); 981 982 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 983 return (ENXIO); 984 985 DPRINTF(IOCTL_DBG, ("xdf%d:ioctl: cmd %d (0x%x)\n", 986 instance, cmd, cmd)); 987 988 part = XDF_PART(minor); 989 if (!xdf_isopen(vdp, part)) 990 return (ENXIO); 991 992 switch (cmd) { 993 case DKIOCGMEDIAINFO: { 994 struct dk_minfo media_info; 995 996 media_info.dki_lbsize = DEV_BSIZE; 997 media_info.dki_capacity = vdp->xdf_xdev_nblocks; 998 media_info.dki_media_type = DK_FIXED_DISK; 999 1000 if (ddi_copyout(&media_info, (void *)arg, 1001 sizeof (struct dk_minfo), mode)) { 1002 return (EFAULT); 1003 } else { 1004 return (0); 1005 } 1006 } 1007 1008 case DKIOCINFO: { 1009 struct dk_cinfo info; 1010 1011 /* controller information */ 1012 if (XD_IS_CD(vdp)) 1013 info.dki_ctype = DKC_CDROM; 1014 else 1015 info.dki_ctype = DKC_VBD; 1016 1017 info.dki_cnum = 0; 1018 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 1019 1020 /* unit information */ 1021 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 1022 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 1023 info.dki_flags = DKI_FMTVOL; 1024 info.dki_partition = part; 1025 info.dki_maxtransfer = maxphys / DEV_BSIZE; 1026 info.dki_addr = 0; 1027 info.dki_space = 0; 1028 info.dki_prio = 0; 1029 info.dki_vec = 0; 1030 1031 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 1032 return (EFAULT); 1033 else 1034 return (0); 1035 } 1036 1037 case DKIOCSTATE: { 1038 enum dkio_state dkstate = DKIO_INSERTED; 1039 if (ddi_copyout(&dkstate, (void *)arg, sizeof (dkstate), 1040 mode) != 0) 1041 return (EFAULT); 1042 return (0); 1043 } 1044 1045 /* 1046 * is media removable? 1047 */ 1048 case DKIOCREMOVABLE: { 1049 int i = XD_IS_RM(vdp) ? 1 : 0; 1050 if (ddi_copyout(&i, (caddr_t)arg, sizeof (int), mode)) 1051 return (EFAULT); 1052 return (0); 1053 } 1054 1055 case DKIOCG_PHYGEOM: 1056 case DKIOCG_VIRTGEOM: 1057 case DKIOCGGEOM: 1058 case DKIOCSGEOM: 1059 case DKIOCGAPART: 1060 case DKIOCGVTOC: 1061 case DKIOCSVTOC: 1062 case DKIOCPARTINFO: 1063 case DKIOCGETEFI: 1064 case DKIOCSETEFI: 1065 case DKIOCPARTITION: { 1066 int rc; 1067 1068 rc = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 1069 rvalp, NULL); 1070 return (rc); 1071 } 1072 1073 case DKIOCGETWCE: 1074 if (ddi_copyout(&vdp->xdf_wce, (void *)arg, 1075 sizeof (vdp->xdf_wce), mode)) 1076 return (EFAULT); 1077 return (0); 1078 case DKIOCSETWCE: 1079 if (ddi_copyin((void *)arg, &vdp->xdf_wce, 1080 sizeof (vdp->xdf_wce), mode)) 1081 return (EFAULT); 1082 return (0); 1083 case DKIOCFLUSHWRITECACHE: { 1084 int rc; 1085 struct dk_callback *dkc = (struct dk_callback *)arg; 1086 1087 if (vdp->xdf_flush_supported) { 1088 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1089 NULL, 0, 0, (void *)dev); 1090 } else { 1091 if (xdf_barrier_flush_disable) 1092 return (ENOTTY); 1093 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1094 vdp->xdf_cache_flush_block, xdf_flush_block, 1095 DEV_BSIZE, (void *)dev); 1096 } 1097 if ((mode & FKIOCTL) && (dkc != NULL) && 1098 (dkc->dkc_callback != NULL)) { 1099 (*dkc->dkc_callback)(dkc->dkc_cookie, rc); 1100 /* need to return 0 after calling callback */ 1101 rc = 0; 1102 } 1103 return (rc); 1104 } 1105 1106 default: 1107 return (ENOTTY); 1108 } 1109 } 1110 1111 /* 1112 * xdf interrupt handler 1113 */ 1114 static uint_t 1115 xdf_intr(caddr_t arg) 1116 { 1117 xdf_t *vdp = (xdf_t *)arg; 1118 xendev_ring_t *xbr; 1119 blkif_response_t *resp; 1120 int bioerr = 0; 1121 uint64_t id; 1122 extern int do_polled_io; 1123 uint8_t op; 1124 uint16_t status; 1125 ddi_acc_handle_t acchdl; 1126 1127 mutex_enter(&vdp->xdf_dev_lk); 1128 1129 if ((xbr = vdp->xdf_xb_ring) == NULL) { 1130 mutex_exit(&vdp->xdf_dev_lk); 1131 return (DDI_INTR_UNCLAIMED); 1132 } 1133 1134 acchdl = vdp->xdf_xb_ring_hdl; 1135 1136 /* 1137 * complete all requests which have a response 1138 */ 1139 while (resp = xvdi_ring_get_response(xbr)) { 1140 id = ddi_get64(acchdl, &resp->id); 1141 op = ddi_get8(acchdl, &resp->operation); 1142 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 1143 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 1144 op, id, status)); 1145 1146 /* 1147 * XXPV - close connection to the backend and restart 1148 */ 1149 if (status != BLKIF_RSP_OKAY) { 1150 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 1151 ddi_get_name_addr(vdp->xdf_dip), 1152 (op == BLKIF_OP_READ) ? "reading" : "writing")); 1153 bioerr = EIO; 1154 } 1155 1156 xdf_iofini(vdp, id, bioerr); 1157 } 1158 1159 mutex_exit(&vdp->xdf_dev_lk); 1160 1161 if (!do_polled_io) 1162 xdf_iostart(vdp); 1163 1164 return (DDI_INTR_CLAIMED); 1165 } 1166 1167 int xdf_fbrewrites; /* how many times was our flush block rewritten */ 1168 1169 /* 1170 * Snarf new data if our flush block was re-written 1171 */ 1172 static void 1173 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 1174 { 1175 int nblks; 1176 boolean_t mapin; 1177 1178 if (IS_WRITE_BARRIER(vdp, bp)) 1179 return; /* write was a flush write */ 1180 1181 mapin = B_FALSE; 1182 nblks = bp->b_bcount >> DEV_BSHIFT; 1183 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 1184 xdf_fbrewrites++; 1185 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 1186 mapin = B_TRUE; 1187 bp_mapin(bp); 1188 } 1189 bcopy(bp->b_un.b_addr + 1190 ((xdf_flush_block - blkno) << DEV_BSHIFT), 1191 vdp->xdf_cache_flush_block, DEV_BSIZE); 1192 if (mapin) 1193 bp_mapout(bp); 1194 } 1195 } 1196 1197 static void 1198 xdf_iofini(xdf_t *vdp, uint64_t id, int bioerr) 1199 { 1200 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 1201 v_req_t *vreq = gs->vreq; 1202 buf_t *bp = vreq->v_buf; 1203 1204 gs_free(vdp, gs); 1205 if (bioerr) 1206 bioerror(bp, bioerr); 1207 vreq->v_nslots--; 1208 if (vreq->v_nslots != 0) 1209 return; 1210 1211 XDF_UPDATE_IO_STAT(vdp, bp); 1212 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1213 1214 if (IS_ERROR(bp)) 1215 bp->b_resid = bp->b_bcount; 1216 1217 vreq_free(vdp, vreq); 1218 biodone(bp); 1219 } 1220 1221 /* 1222 * return value of xdf_prepare_rreq() 1223 * used in xdf_iostart() 1224 */ 1225 #define XF_PARTIAL 0 /* rreq is full, not all I/O in buf transferred */ 1226 #define XF_COMP 1 /* no more I/O left in buf */ 1227 1228 static void 1229 xdf_iostart(xdf_t *vdp) 1230 { 1231 xendev_ring_t *xbr; 1232 struct buf *bp; 1233 blkif_request_t *rreq; 1234 int retval; 1235 int rreqready = 0; 1236 1237 xbr = vdp->xdf_xb_ring; 1238 1239 /* 1240 * populate the ring request(s) 1241 * 1242 * loop until there is no buf to transfer or no free slot 1243 * available in I/O ring 1244 */ 1245 for (;;) { 1246 mutex_enter(&vdp->xdf_dev_lk); 1247 1248 if (vdp->xdf_status != XD_READY) 1249 break; 1250 1251 /* active buf queue empty? */ 1252 if ((bp = vdp->xdf_f_act) == NULL) 1253 break; 1254 1255 /* try to grab a vreq for this bp */ 1256 if ((BP2VREQ(bp) == NULL) && (vreq_get(vdp, bp) == NULL)) 1257 break; 1258 /* alloc DMA/GTE resources */ 1259 if (vreq_setup(vdp, BP2VREQ(bp)) != DDI_SUCCESS) 1260 break; 1261 1262 /* get next blkif_request in the ring */ 1263 if ((rreq = xvdi_ring_get_request(xbr)) == NULL) 1264 break; 1265 bzero(rreq, sizeof (blkif_request_t)); 1266 1267 /* populate blkif_request with this buf */ 1268 rreqready++; 1269 retval = xdf_prepare_rreq(vdp, bp, rreq); 1270 if (retval == XF_COMP) { 1271 /* finish this bp, switch to next one */ 1272 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1273 vdp->xdf_f_act = bp->av_forw; 1274 bp->av_forw = NULL; 1275 } 1276 1277 mutex_exit(&vdp->xdf_dev_lk); 1278 } 1279 1280 /* 1281 * Send the request(s) to the backend 1282 */ 1283 if (rreqready) { 1284 if (xvdi_ring_push_request(xbr)) { 1285 DPRINTF(IO_DBG, ("xdf_iostart: " 1286 "sent request(s) to backend\n")); 1287 xvdi_notify_oe(vdp->xdf_dip); 1288 } 1289 } 1290 1291 mutex_exit(&vdp->xdf_dev_lk); 1292 } 1293 1294 /* 1295 * populate a single blkif_request_t w/ a buf 1296 */ 1297 static int 1298 xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1299 { 1300 int rval; 1301 grant_ref_t gr; 1302 uint8_t fsect, lsect; 1303 size_t bcnt; 1304 paddr_t dma_addr; 1305 off_t blk_off; 1306 dev_info_t *dip = vdp->xdf_dip; 1307 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1308 v_req_t *vreq = BP2VREQ(bp); 1309 uint64_t blkno = vreq->v_blkno; 1310 uint_t ndmacs = vreq->v_ndmacs; 1311 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1312 int seg = 0; 1313 int isread = IS_READ(bp); 1314 1315 if (isread) 1316 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1317 else { 1318 switch (vreq->v_flush_diskcache) { 1319 case FLUSH_DISKCACHE: 1320 ddi_put8(acchdl, &rreq->operation, 1321 BLKIF_OP_FLUSH_DISKCACHE); 1322 ddi_put16(acchdl, &rreq->handle, vdev); 1323 ddi_put64(acchdl, &rreq->id, 1324 (uint64_t)(uintptr_t)(vreq->v_gs)); 1325 ddi_put8(acchdl, &rreq->nr_segments, 0); 1326 return (XF_COMP); 1327 case WRITE_BARRIER: 1328 ddi_put8(acchdl, &rreq->operation, 1329 BLKIF_OP_WRITE_BARRIER); 1330 break; 1331 default: 1332 if (!vdp->xdf_wce) 1333 ddi_put8(acchdl, &rreq->operation, 1334 BLKIF_OP_WRITE_BARRIER); 1335 else 1336 ddi_put8(acchdl, &rreq->operation, 1337 BLKIF_OP_WRITE); 1338 break; 1339 } 1340 } 1341 1342 ddi_put16(acchdl, &rreq->handle, vdev); 1343 ddi_put64(acchdl, &rreq->sector_number, blkno); 1344 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(vreq->v_gs)); 1345 1346 /* 1347 * loop until all segments are populated or no more dma cookie in buf 1348 */ 1349 for (;;) { 1350 /* 1351 * Each segment of a blkif request can transfer up to 1352 * one 4K page of data. 1353 */ 1354 bcnt = vreq->v_dmac.dmac_size; 1355 ASSERT(bcnt <= PAGESIZE); 1356 ASSERT((bcnt % XB_BSIZE) == 0); 1357 dma_addr = vreq->v_dmac.dmac_laddress; 1358 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1359 ASSERT((blk_off & XB_BMASK) == 0); 1360 fsect = blk_off >> XB_BSHIFT; 1361 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1362 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1363 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1364 DPRINTF(IO_DBG, (" ""seg%d: dmacS %lu blk_off %ld\n", 1365 seg, vreq->v_dmac.dmac_size, blk_off)); 1366 gr = gs_grant(vreq->v_gs, PATOMA(dma_addr) >> PAGESHIFT); 1367 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1368 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1369 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1370 DPRINTF(IO_DBG, (" ""seg%d: fs %d ls %d gr %d dma 0x%"PRIx64 1371 "\n", seg, fsect, lsect, gr, dma_addr)); 1372 1373 blkno += (bcnt >> XB_BSHIFT); 1374 seg++; 1375 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1376 if (--ndmacs) { 1377 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1378 continue; 1379 } 1380 1381 vreq->v_status = VREQ_DMAWIN_DONE; 1382 vreq->v_blkno = blkno; 1383 if (vreq->v_dmaw + 1 == vreq->v_ndmaws) 1384 /* last win */ 1385 rval = XF_COMP; 1386 else 1387 rval = XF_PARTIAL; 1388 break; 1389 } 1390 ddi_put8(acchdl, &rreq->nr_segments, seg); 1391 DPRINTF(IO_DBG, ("xdf_prepare_rreq: request id=%"PRIx64" ready\n", 1392 rreq->id)); 1393 1394 return (rval); 1395 } 1396 1397 #define XDF_QSEC 50000 /* .005 second */ 1398 #define XDF_POLLCNT 12 /* loop for 12 times before time out */ 1399 1400 static int 1401 xdf_drain_io(xdf_t *vdp) 1402 { 1403 int pollc, rval; 1404 xendev_ring_t *xbr; 1405 1406 if (xdfdebug & SUSRES_DBG) 1407 xen_printf("xdf_drain_io: start\n"); 1408 1409 mutex_enter(&vdp->xdf_dev_lk); 1410 1411 if ((vdp->xdf_status != XD_READY) && (vdp->xdf_status != XD_SUSPEND)) 1412 goto out; 1413 1414 rval = 0; 1415 xbr = vdp->xdf_xb_ring; 1416 ASSERT(xbr != NULL); 1417 1418 for (pollc = 0; pollc < XDF_POLLCNT; pollc++) { 1419 if (xvdi_ring_has_unconsumed_responses(xbr)) { 1420 mutex_exit(&vdp->xdf_dev_lk); 1421 (void) xdf_intr((caddr_t)vdp); 1422 mutex_enter(&vdp->xdf_dev_lk); 1423 } 1424 if (!xvdi_ring_has_incomp_request(xbr)) 1425 goto out; 1426 1427 (void) HYPERVISOR_yield(); 1428 /* 1429 * file-backed devices can be slow 1430 */ 1431 drv_usecwait(XDF_QSEC << pollc); 1432 } 1433 cmn_err(CE_WARN, "xdf_polled_io: timeout"); 1434 rval = EIO; 1435 out: 1436 mutex_exit(&vdp->xdf_dev_lk); 1437 if (xdfdebug & SUSRES_DBG) 1438 xen_printf("xdf_drain_io: end, err=%d\n", rval); 1439 return (rval); 1440 } 1441 1442 /* ARGSUSED5 */ 1443 static int 1444 xdf_lb_rdwr(dev_info_t *devi, uchar_t cmd, void *bufp, 1445 diskaddr_t start, size_t reqlen, void *tg_cookie) 1446 { 1447 xdf_t *vdp; 1448 struct buf *bp; 1449 int err = 0; 1450 1451 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1452 if (vdp == NULL) 1453 return (ENXIO); 1454 1455 if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_xdev_nblocks) 1456 return (EINVAL); 1457 1458 bp = getrbuf(KM_SLEEP); 1459 if (cmd == TG_READ) 1460 bp->b_flags = B_BUSY | B_READ; 1461 else 1462 bp->b_flags = B_BUSY | B_WRITE; 1463 bp->b_un.b_addr = bufp; 1464 bp->b_bcount = reqlen; 1465 bp->b_resid = 0; 1466 bp->b_blkno = start; 1467 bp->av_forw = NULL; 1468 bp->av_back = NULL; 1469 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 1470 1471 mutex_enter(&vdp->xdf_dev_lk); 1472 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1473 if (vdp->xdf_f_act == NULL) { 1474 vdp->xdf_f_act = vdp->xdf_l_act = bp; 1475 } else { 1476 vdp->xdf_l_act->av_forw = bp; 1477 vdp->xdf_l_act = bp; 1478 } 1479 mutex_exit(&vdp->xdf_dev_lk); 1480 xdf_iostart(vdp); 1481 err = biowait(bp); 1482 1483 ASSERT(bp->b_flags & B_DONE); 1484 1485 freerbuf(bp); 1486 return (err); 1487 } 1488 1489 /* 1490 * synthetic geometry 1491 */ 1492 #define XDF_NSECTS 256 1493 #define XDF_NHEADS 16 1494 1495 static int 1496 xdf_lb_getcap(dev_info_t *devi, diskaddr_t *capp) 1497 { 1498 xdf_t *vdp; 1499 1500 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1501 1502 if (vdp == NULL) 1503 return (ENXIO); 1504 1505 mutex_enter(&vdp->xdf_dev_lk); 1506 *capp = vdp->xdf_xdev_nblocks; 1507 DPRINTF(LBL_DBG, ("capacity %llu\n", *capp)); 1508 mutex_exit(&vdp->xdf_dev_lk); 1509 return (0); 1510 } 1511 1512 static int 1513 xdf_lb_getpgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1514 { 1515 xdf_t *vdp; 1516 uint_t ncyl; 1517 uint_t spc = XDF_NHEADS * XDF_NSECTS; 1518 1519 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1520 1521 if (vdp == NULL) 1522 return (ENXIO); 1523 1524 ncyl = vdp->xdf_xdev_nblocks / spc; 1525 1526 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1527 geomp->g_acyl = 0; 1528 geomp->g_nhead = XDF_NHEADS; 1529 geomp->g_secsize = XB_BSIZE; 1530 geomp->g_nsect = XDF_NSECTS; 1531 geomp->g_intrlv = 0; 1532 geomp->g_rpm = 7200; 1533 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1534 return (0); 1535 } 1536 1537 /* 1538 * No real HBA, no geometry available from it 1539 */ 1540 /*ARGSUSED*/ 1541 static int 1542 xdf_lb_getvgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1543 { 1544 return (EINVAL); 1545 } 1546 1547 static int 1548 xdf_lb_getattribute(dev_info_t *devi, tg_attribute_t *tgattributep) 1549 { 1550 xdf_t *vdp; 1551 1552 if (!(vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)))) 1553 return (ENXIO); 1554 1555 if (XD_IS_RO(vdp)) 1556 tgattributep->media_is_writable = 0; 1557 else 1558 tgattributep->media_is_writable = 1; 1559 return (0); 1560 } 1561 1562 /* ARGSUSED3 */ 1563 static int 1564 xdf_lb_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie) 1565 { 1566 switch (cmd) { 1567 case TG_GETPHYGEOM: 1568 return (xdf_lb_getpgeom(devi, (cmlb_geom_t *)arg)); 1569 case TG_GETVIRTGEOM: 1570 return (xdf_lb_getvgeom(devi, (cmlb_geom_t *)arg)); 1571 case TG_GETCAPACITY: 1572 return (xdf_lb_getcap(devi, (diskaddr_t *)arg)); 1573 case TG_GETBLOCKSIZE: 1574 *(uint32_t *)arg = XB_BSIZE; 1575 return (0); 1576 case TG_GETATTR: 1577 return (xdf_lb_getattribute(devi, (tg_attribute_t *)arg)); 1578 default: 1579 return (ENOTTY); 1580 } 1581 } 1582 1583 /* 1584 * Kick-off connect process 1585 * Status should be XD_UNKNOWN or XD_CLOSED 1586 * On success, status will be changed to XD_INIT 1587 * On error, status won't be changed 1588 */ 1589 static int 1590 xdf_start_connect(xdf_t *vdp) 1591 { 1592 char *xsnode; 1593 grant_ref_t gref; 1594 xenbus_transaction_t xbt; 1595 int rv; 1596 dev_info_t *dip = vdp->xdf_dip; 1597 1598 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == (domid_t)-1) 1599 goto errout; 1600 1601 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) { 1602 cmn_err(CE_WARN, "xdf@%s: failed to alloc event channel", 1603 ddi_get_name_addr(dip)); 1604 goto errout; 1605 } 1606 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1607 DDI_SUCCESS) { 1608 cmn_err(CE_WARN, "xdf_start_connect: xdf@%s: " 1609 "failed to add intr handler", ddi_get_name_addr(dip)); 1610 goto errout1; 1611 } 1612 1613 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1614 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1615 DDI_SUCCESS) { 1616 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1617 ddi_get_name_addr(dip)); 1618 goto errout2; 1619 } 1620 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1621 1622 /* 1623 * Write into xenstore the info needed by backend 1624 */ 1625 if ((xsnode = xvdi_get_xsname(dip)) == NULL) { 1626 cmn_err(CE_WARN, "xdf@%s: " 1627 "failed to get xenstore node path", 1628 ddi_get_name_addr(dip)); 1629 goto fail_trans; 1630 } 1631 trans_retry: 1632 if (xenbus_transaction_start(&xbt)) { 1633 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1634 ddi_get_name_addr(dip)); 1635 xvdi_fatal_error(dip, EIO, "transaction start"); 1636 goto fail_trans; 1637 } 1638 1639 if (rv = xenbus_printf(xbt, xsnode, "ring-ref", "%u", gref)) { 1640 cmn_err(CE_WARN, "xdf@%s: failed to write ring-ref", 1641 ddi_get_name_addr(dip)); 1642 xvdi_fatal_error(dip, rv, "writing ring-ref"); 1643 goto abort_trans; 1644 } 1645 1646 if (rv = xenbus_printf(xbt, xsnode, "event-channel", "%u", 1647 xvdi_get_evtchn(dip))) { 1648 cmn_err(CE_WARN, "xdf@%s: failed to write event-channel", 1649 ddi_get_name_addr(dip)); 1650 xvdi_fatal_error(dip, rv, "writing event-channel"); 1651 goto abort_trans; 1652 } 1653 1654 if ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0) { 1655 cmn_err(CE_WARN, "xdf@%s: " 1656 "failed to switch state to XenbusStateInitialised", 1657 ddi_get_name_addr(dip)); 1658 xvdi_fatal_error(dip, rv, "writing state"); 1659 goto abort_trans; 1660 } 1661 1662 /* kick-off connect process */ 1663 if (rv = xenbus_transaction_end(xbt, 0)) { 1664 if (rv == EAGAIN) 1665 goto trans_retry; 1666 cmn_err(CE_WARN, "xdf@%s: failed to end transaction", 1667 ddi_get_name_addr(dip)); 1668 xvdi_fatal_error(dip, rv, "completing transaction"); 1669 goto fail_trans; 1670 } 1671 1672 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1673 mutex_enter(&vdp->xdf_dev_lk); 1674 vdp->xdf_status = XD_INIT; 1675 mutex_exit(&vdp->xdf_dev_lk); 1676 1677 return (DDI_SUCCESS); 1678 1679 abort_trans: 1680 (void) xenbus_transaction_end(xbt, 1); 1681 fail_trans: 1682 xvdi_free_ring(vdp->xdf_xb_ring); 1683 errout2: 1684 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1685 errout1: 1686 xvdi_free_evtchn(dip); 1687 errout: 1688 cmn_err(CE_WARN, "xdf@%s: fail to kick-off connecting", 1689 ddi_get_name_addr(dip)); 1690 return (DDI_FAILURE); 1691 } 1692 1693 /* 1694 * Kick-off disconnect process 1695 * Status won't be changed 1696 */ 1697 static int 1698 xdf_start_disconnect(xdf_t *vdp) 1699 { 1700 if (xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed) > 0) { 1701 cmn_err(CE_WARN, "xdf@%s: fail to kick-off disconnecting", 1702 ddi_get_name_addr(vdp->xdf_dip)); 1703 return (DDI_FAILURE); 1704 } 1705 1706 return (DDI_SUCCESS); 1707 } 1708 1709 int 1710 xdf_get_flush_block(xdf_t *vdp) 1711 { 1712 /* 1713 * Get a DEV_BSIZE aligned bufer 1714 */ 1715 vdp->xdf_flush_mem = kmem_alloc(DEV_BSIZE * 2, KM_SLEEP); 1716 vdp->xdf_cache_flush_block = 1717 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), DEV_BSIZE); 1718 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1719 xdf_flush_block, DEV_BSIZE, NULL) != 0) 1720 return (DDI_FAILURE); 1721 return (DDI_SUCCESS); 1722 } 1723 1724 /* 1725 * Finish other initialization after we've connected to backend 1726 * Status should be XD_INIT before calling this routine 1727 * On success, status should be changed to XD_READY 1728 * On error, status should stay XD_INIT 1729 */ 1730 static int 1731 xdf_post_connect(xdf_t *vdp) 1732 { 1733 int rv; 1734 uint_t len; 1735 char *type; 1736 char *barrier; 1737 dev_info_t *devi = vdp->xdf_dip; 1738 1739 /* 1740 * Determine if feature barrier is supported by backend 1741 */ 1742 if (xenbus_read(XBT_NULL, xvdi_get_oename(devi), 1743 "feature-barrier", (void **)&barrier, &len) == 0) { 1744 vdp->xdf_feature_barrier = 1; 1745 kmem_free(barrier, len); 1746 } else { 1747 cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier", 1748 ddi_get_name_addr(vdp->xdf_dip)); 1749 vdp->xdf_feature_barrier = 0; 1750 } 1751 1752 /* probe backend */ 1753 if (rv = xenbus_gather(XBT_NULL, xvdi_get_oename(devi), 1754 "sectors", "%"SCNu64, &vdp->xdf_xdev_nblocks, 1755 "info", "%u", &vdp->xdf_xdev_info, NULL)) { 1756 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1757 "cannot read backend info", ddi_get_name_addr(devi)); 1758 xvdi_fatal_error(devi, rv, "reading backend info"); 1759 return (DDI_FAILURE); 1760 } 1761 1762 /* fix disk type */ 1763 if (xenbus_read(XBT_NULL, xvdi_get_xsname(devi), "device-type", 1764 (void **)&type, &len) != 0) { 1765 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1766 "cannot read device-type", ddi_get_name_addr(devi)); 1767 xvdi_fatal_error(devi, rv, "reading device-type"); 1768 return (DDI_FAILURE); 1769 } 1770 if (strcmp(type, "cdrom") == 0) 1771 vdp->xdf_xdev_info |= VDISK_CDROM; 1772 kmem_free(type, len); 1773 1774 /* 1775 * We've created all the minor nodes via cmlb_attach() using default 1776 * value in xdf_attach() to make it possbile to block in xdf_open(), 1777 * in case there's anyone (say, booting thread) ever trying to open 1778 * it before connected to backend. We will refresh all those minor 1779 * nodes w/ latest info we've got now when we are almost connected. 1780 * 1781 * Don't do this when xdf is already opened by someone (could happen 1782 * during resume), for that cmlb_attach() will invalid the label info 1783 * and confuse those who has already opened the node, which is bad. 1784 */ 1785 if (!xdf_isopen(vdp, -1) && (XD_IS_CD(vdp) || XD_IS_RM(vdp))) { 1786 /* re-init cmlb w/ latest info we got from backend */ 1787 if (cmlb_attach(devi, &xdf_lb_ops, 1788 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 1789 XD_IS_RM(vdp), 1, DDI_NT_BLOCK, 1790 CMLB_FAKE_LABEL_ONE_PARTITION, 1791 vdp->xdf_vd_lbl, NULL) != 0) { 1792 cmn_err(CE_WARN, "xdf@%s: cmlb attach failed", 1793 ddi_get_name_addr(devi)); 1794 return (DDI_FAILURE); 1795 } 1796 } 1797 1798 /* mark vbd is ready for I/O */ 1799 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1800 mutex_enter(&vdp->xdf_dev_lk); 1801 vdp->xdf_status = XD_READY; 1802 mutex_exit(&vdp->xdf_dev_lk); 1803 /* 1804 * If backend has feature-barrier, see if it supports disk 1805 * cache flush op. 1806 */ 1807 vdp->xdf_flush_supported = 0; 1808 if (vdp->xdf_feature_barrier) { 1809 /* 1810 * Pretend we already know flush is supported so probe 1811 * will attempt the correct op. 1812 */ 1813 vdp->xdf_flush_supported = 1; 1814 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1815 vdp->xdf_flush_supported = 1; 1816 } else { 1817 vdp->xdf_flush_supported = 0; 1818 /* 1819 * If the other end does not support the cache flush op 1820 * then we must use a barrier-write to force disk 1821 * cache flushing. Barrier writes require that a data 1822 * block actually be written. 1823 * Cache a block to barrier-write when we are 1824 * asked to perform a flush. 1825 * XXX - would it be better to just copy 1 block 1826 * (512 bytes) from whatever write we did last 1827 * and rewrite that block? 1828 */ 1829 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) 1830 return (DDI_FAILURE); 1831 } 1832 } 1833 1834 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", ddi_get_name_addr(devi), 1835 (uint64_t)vdp->xdf_xdev_nblocks); 1836 1837 return (DDI_SUCCESS); 1838 } 1839 1840 /* 1841 * Finish other uninitialization after we've disconnected from backend 1842 * when status is XD_CLOSING or XD_INIT. After returns, status is XD_CLOSED 1843 */ 1844 static void 1845 xdf_post_disconnect(xdf_t *vdp) 1846 { 1847 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1848 xvdi_free_evtchn(vdp->xdf_dip); 1849 xvdi_free_ring(vdp->xdf_xb_ring); 1850 vdp->xdf_xb_ring = NULL; 1851 vdp->xdf_xb_ring_hdl = NULL; 1852 vdp->xdf_peer = (domid_t)-1; 1853 1854 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1855 mutex_enter(&vdp->xdf_dev_lk); 1856 vdp->xdf_status = XD_CLOSED; 1857 mutex_exit(&vdp->xdf_dev_lk); 1858 } 1859 1860 /*ARGSUSED*/ 1861 static void 1862 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 1863 { 1864 XenbusState new_state = *(XenbusState *)impl_data; 1865 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 1866 boolean_t unexpect_die = B_FALSE; 1867 int status; 1868 1869 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 1870 ddi_get_name_addr(dip), new_state)); 1871 1872 mutex_enter(&vdp->xdf_cb_lk); 1873 1874 if (xdf_check_state_transition(vdp, new_state) == DDI_FAILURE) { 1875 mutex_exit(&vdp->xdf_cb_lk); 1876 return; 1877 } 1878 1879 switch (new_state) { 1880 case XenbusStateInitialising: 1881 ASSERT(vdp->xdf_status == XD_CLOSED); 1882 /* 1883 * backend recovered from a previous failure, 1884 * kick-off connect process again 1885 */ 1886 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 1887 cmn_err(CE_WARN, "xdf@%s:" 1888 " failed to start reconnecting to backend", 1889 ddi_get_name_addr(dip)); 1890 } 1891 break; 1892 case XenbusStateConnected: 1893 ASSERT(vdp->xdf_status == XD_INIT); 1894 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1895 /* finish final init after connect */ 1896 if (xdf_post_connect(vdp) != DDI_SUCCESS) 1897 (void) xdf_start_disconnect(vdp); 1898 break; 1899 case XenbusStateClosing: 1900 if (vdp->xdf_status == XD_READY) { 1901 mutex_enter(&vdp->xdf_dev_lk); 1902 if (xdf_isopen(vdp, -1)) { 1903 cmn_err(CE_NOTE, "xdf@%s: hot-unplug failed, " 1904 "still in use", ddi_get_name_addr(dip)); 1905 mutex_exit(&vdp->xdf_dev_lk); 1906 break; 1907 } else { 1908 vdp->xdf_status = XD_CLOSING; 1909 } 1910 mutex_exit(&vdp->xdf_dev_lk); 1911 } 1912 (void) xdf_start_disconnect(vdp); 1913 break; 1914 case XenbusStateClosed: 1915 /* first check if BE closed unexpectedly */ 1916 mutex_enter(&vdp->xdf_dev_lk); 1917 if (xdf_isopen(vdp, -1)) { 1918 unexpect_die = B_TRUE; 1919 unexpectedie(vdp); 1920 cmn_err(CE_WARN, "xdf@%s: backend closed, " 1921 "reconnecting...", ddi_get_name_addr(dip)); 1922 } 1923 mutex_exit(&vdp->xdf_dev_lk); 1924 1925 if (vdp->xdf_status == XD_READY) { 1926 mutex_enter(&vdp->xdf_dev_lk); 1927 vdp->xdf_status = XD_CLOSING; 1928 mutex_exit(&vdp->xdf_dev_lk); 1929 1930 #ifdef DOMU_BACKEND 1931 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1932 #endif 1933 1934 xdf_post_disconnect(vdp); 1935 (void) xvdi_switch_state(dip, XBT_NULL, 1936 XenbusStateClosed); 1937 } else if ((vdp->xdf_status == XD_INIT) || 1938 (vdp->xdf_status == XD_CLOSING)) { 1939 xdf_post_disconnect(vdp); 1940 } else { 1941 mutex_enter(&vdp->xdf_dev_lk); 1942 vdp->xdf_status = XD_CLOSED; 1943 mutex_exit(&vdp->xdf_dev_lk); 1944 } 1945 } 1946 1947 /* notify anybody waiting for oe state change */ 1948 mutex_enter(&vdp->xdf_dev_lk); 1949 cv_broadcast(&vdp->xdf_dev_cv); 1950 mutex_exit(&vdp->xdf_dev_lk); 1951 1952 status = vdp->xdf_status; 1953 mutex_exit(&vdp->xdf_cb_lk); 1954 1955 if (status == XD_READY) { 1956 xdf_iostart(vdp); 1957 } else if ((status == XD_CLOSED) && !unexpect_die) { 1958 /* interface is closed successfully, remove all minor nodes */ 1959 cmlb_detach(vdp->xdf_vd_lbl, NULL); 1960 cmlb_free_handle(&vdp->xdf_vd_lbl); 1961 } 1962 } 1963 1964 /* check if partition is open, -1 - check all partitions on the disk */ 1965 static boolean_t 1966 xdf_isopen(xdf_t *vdp, int partition) 1967 { 1968 int i; 1969 ulong_t parbit; 1970 boolean_t rval = B_FALSE; 1971 1972 if (partition == -1) 1973 parbit = (ulong_t)-1; 1974 else 1975 parbit = 1 << partition; 1976 1977 for (i = 0; i < OTYPCNT; i++) { 1978 if (vdp->xdf_vd_open[i] & parbit) 1979 rval = B_TRUE; 1980 } 1981 1982 return (rval); 1983 } 1984 1985 /* 1986 * Xdf_check_state_transition will check the XenbusState change to see 1987 * if the change is a valid transition or not. 1988 * The new state is written by backend domain, or by running xenstore-write 1989 * to change it manually in dom0 1990 */ 1991 static int 1992 xdf_check_state_transition(xdf_t *vdp, XenbusState oestate) 1993 { 1994 int status; 1995 int stcheck; 1996 #define STOK 0 /* need further process */ 1997 #define STNOP 1 /* no action need taking */ 1998 #define STBUG 2 /* unexpected state change, could be a bug */ 1999 2000 status = vdp->xdf_status; 2001 stcheck = STOK; 2002 2003 switch (status) { 2004 case XD_UNKNOWN: 2005 if ((oestate == XenbusStateUnknown) || 2006 (oestate == XenbusStateConnected)) 2007 stcheck = STBUG; 2008 else if ((oestate == XenbusStateInitialising) || 2009 (oestate == XenbusStateInitWait) || 2010 (oestate == XenbusStateInitialised)) 2011 stcheck = STNOP; 2012 break; 2013 case XD_INIT: 2014 if (oestate == XenbusStateUnknown) 2015 stcheck = STBUG; 2016 else if ((oestate == XenbusStateInitialising) || 2017 (oestate == XenbusStateInitWait) || 2018 (oestate == XenbusStateInitialised)) 2019 stcheck = STNOP; 2020 break; 2021 case XD_READY: 2022 if ((oestate == XenbusStateUnknown) || 2023 (oestate == XenbusStateInitialising) || 2024 (oestate == XenbusStateInitWait) || 2025 (oestate == XenbusStateInitialised)) 2026 stcheck = STBUG; 2027 else if (oestate == XenbusStateConnected) 2028 stcheck = STNOP; 2029 break; 2030 case XD_CLOSING: 2031 if ((oestate == XenbusStateUnknown) || 2032 (oestate == XenbusStateInitialising) || 2033 (oestate == XenbusStateInitWait) || 2034 (oestate == XenbusStateInitialised) || 2035 (oestate == XenbusStateConnected)) 2036 stcheck = STBUG; 2037 else if (oestate == XenbusStateClosing) 2038 stcheck = STNOP; 2039 break; 2040 case XD_CLOSED: 2041 if ((oestate == XenbusStateUnknown) || 2042 (oestate == XenbusStateConnected)) 2043 stcheck = STBUG; 2044 else if ((oestate == XenbusStateInitWait) || 2045 (oestate == XenbusStateInitialised) || 2046 (oestate == XenbusStateClosing) || 2047 (oestate == XenbusStateClosed)) 2048 stcheck = STNOP; 2049 break; 2050 case XD_SUSPEND: 2051 default: 2052 stcheck = STBUG; 2053 } 2054 2055 if (stcheck == STOK) 2056 return (DDI_SUCCESS); 2057 2058 if (stcheck == STBUG) 2059 cmn_err(CE_NOTE, "xdf@%s: unexpected otherend " 2060 "state change to %d!, when status is %d", 2061 ddi_get_name_addr(vdp->xdf_dip), oestate, status); 2062 2063 return (DDI_FAILURE); 2064 } 2065 2066 static int 2067 xdf_connect(xdf_t *vdp, boolean_t wait) 2068 { 2069 ASSERT(mutex_owned(&vdp->xdf_dev_lk)); 2070 while (vdp->xdf_status != XD_READY) { 2071 if (!wait || (vdp->xdf_status > XD_READY)) 2072 break; 2073 2074 if (cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk) == 0) 2075 break; 2076 } 2077 2078 return (vdp->xdf_status); 2079 } 2080 2081 /* 2082 * callback func when DMA/GTE resources is available 2083 * 2084 * Note: we only register one callback function to grant table subsystem 2085 * since we only have one 'struct gnttab_free_callback' in xdf_t. 2086 */ 2087 static int 2088 xdf_dmacallback(caddr_t arg) 2089 { 2090 xdf_t *vdp = (xdf_t *)arg; 2091 ASSERT(vdp != NULL); 2092 2093 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 2094 ddi_get_name_addr(vdp->xdf_dip))); 2095 2096 ddi_trigger_softintr(vdp->xdf_softintr_id); 2097 return (DDI_DMA_CALLBACK_DONE); 2098 } 2099 2100 static uint_t 2101 xdf_iorestart(caddr_t arg) 2102 { 2103 xdf_t *vdp = (xdf_t *)arg; 2104 2105 ASSERT(vdp != NULL); 2106 2107 mutex_enter(&vdp->xdf_dev_lk); 2108 ASSERT(ISDMACBON(vdp)); 2109 SETDMACBOFF(vdp); 2110 mutex_exit(&vdp->xdf_dev_lk); 2111 2112 xdf_iostart(vdp); 2113 2114 return (DDI_INTR_CLAIMED); 2115 } 2116 2117 static void 2118 xdf_timeout_handler(void *arg) 2119 { 2120 xdf_t *vdp = arg; 2121 2122 mutex_enter(&vdp->xdf_dev_lk); 2123 vdp->xdf_timeout_id = 0; 2124 mutex_exit(&vdp->xdf_dev_lk); 2125 2126 /* new timeout thread could be re-scheduled */ 2127 xdf_iostart(vdp); 2128 } 2129 2130 /* 2131 * Alloc a vreq for this bp 2132 * bp->av_back contains the pointer to the vreq upon return 2133 */ 2134 static v_req_t * 2135 vreq_get(xdf_t *vdp, buf_t *bp) 2136 { 2137 v_req_t *vreq = NULL; 2138 2139 ASSERT(BP2VREQ(bp) == NULL); 2140 2141 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 2142 if (vreq == NULL) { 2143 if (vdp->xdf_timeout_id == 0) 2144 /* restart I/O after one second */ 2145 vdp->xdf_timeout_id = 2146 timeout(xdf_timeout_handler, vdp, hz); 2147 return (NULL); 2148 } 2149 bzero(vreq, sizeof (v_req_t)); 2150 2151 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 2152 bp->av_back = (buf_t *)vreq; 2153 vreq->v_buf = bp; 2154 vreq->v_status = VREQ_INIT; 2155 /* init of other fields in vreq is up to the caller */ 2156 2157 return (vreq); 2158 } 2159 2160 static void 2161 vreq_free(xdf_t *vdp, v_req_t *vreq) 2162 { 2163 buf_t *bp = vreq->v_buf; 2164 2165 list_remove(&vdp->xdf_vreq_act, (void *)vreq); 2166 2167 switch (vreq->v_status) { 2168 case VREQ_DMAWIN_DONE: 2169 case VREQ_GS_ALLOCED: 2170 case VREQ_DMABUF_BOUND: 2171 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 2172 /*FALLTHRU*/ 2173 case VREQ_DMAMEM_ALLOCED: 2174 if (!ALIGNED_XFER(bp)) { 2175 ASSERT(vreq->v_abuf != NULL); 2176 if (!IS_ERROR(bp) && IS_READ(bp)) 2177 bcopy(vreq->v_abuf, bp->b_un.b_addr, 2178 bp->b_bcount); 2179 ddi_dma_mem_free(&vreq->v_align); 2180 } 2181 /*FALLTHRU*/ 2182 case VREQ_MEMDMAHDL_ALLOCED: 2183 if (!ALIGNED_XFER(bp)) 2184 ddi_dma_free_handle(&vreq->v_memdmahdl); 2185 /*FALLTHRU*/ 2186 case VREQ_DMAHDL_ALLOCED: 2187 ddi_dma_free_handle(&vreq->v_dmahdl); 2188 break; 2189 default: 2190 break; 2191 } 2192 vreq->v_buf->av_back = NULL; 2193 kmem_cache_free(xdf_vreq_cache, vreq); 2194 } 2195 2196 /* 2197 * Initalize the DMA and grant table resources for the buf 2198 */ 2199 static int 2200 vreq_setup(xdf_t *vdp, v_req_t *vreq) 2201 { 2202 int rc; 2203 ddi_dma_attr_t dmaattr; 2204 uint_t ndcs, ndws; 2205 ddi_dma_handle_t dh; 2206 ddi_dma_handle_t mdh; 2207 ddi_dma_cookie_t dc; 2208 ddi_acc_handle_t abh; 2209 caddr_t aba; 2210 ge_slot_t *gs; 2211 size_t bufsz; 2212 off_t off; 2213 size_t sz; 2214 buf_t *bp = vreq->v_buf; 2215 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 2216 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 2217 2218 switch (vreq->v_status) { 2219 case VREQ_INIT: 2220 if (IS_FLUSH_DISKCACHE(bp)) { 2221 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2222 DPRINTF(DMA_DBG, ( 2223 "xdf@%s: get ge_slotfailed\n", 2224 ddi_get_name_addr(vdp->xdf_dip))); 2225 return (DDI_FAILURE); 2226 } 2227 vreq->v_blkno = 0; 2228 vreq->v_nslots = 1; 2229 vreq->v_gs = gs; 2230 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 2231 gs->vreq = vreq; 2232 return (DDI_SUCCESS); 2233 } 2234 2235 if (IS_WRITE_BARRIER(vdp, bp)) 2236 vreq->v_flush_diskcache = WRITE_BARRIER; 2237 vreq->v_blkno = bp->b_blkno + 2238 (diskaddr_t)(uintptr_t)bp->b_private; 2239 bp->b_private = NULL; 2240 /* See if we wrote new data to our flush block */ 2241 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 2242 check_fbwrite(vdp, bp, vreq->v_blkno); 2243 vreq->v_status = VREQ_INIT_DONE; 2244 /*FALLTHRU*/ 2245 2246 case VREQ_INIT_DONE: 2247 /* 2248 * alloc DMA handle 2249 */ 2250 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 2251 xdf_dmacallback, (caddr_t)vdp, &dh); 2252 if (rc != DDI_SUCCESS) { 2253 SETDMACBON(vdp); 2254 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 2255 ddi_get_name_addr(vdp->xdf_dip))); 2256 return (DDI_FAILURE); 2257 } 2258 2259 vreq->v_dmahdl = dh; 2260 vreq->v_status = VREQ_DMAHDL_ALLOCED; 2261 /*FALLTHRU*/ 2262 2263 case VREQ_DMAHDL_ALLOCED: 2264 /* 2265 * alloc dma handle for 512-byte aligned buf 2266 */ 2267 if (!ALIGNED_XFER(bp)) { 2268 /* 2269 * XXPV: we need to temporarily enlarge the seg 2270 * boundary and s/g length to work round CR6381968 2271 */ 2272 dmaattr = xb_dma_attr; 2273 dmaattr.dma_attr_seg = (uint64_t)-1; 2274 dmaattr.dma_attr_sgllen = INT_MAX; 2275 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 2276 xdf_dmacallback, (caddr_t)vdp, &mdh); 2277 if (rc != DDI_SUCCESS) { 2278 SETDMACBON(vdp); 2279 DPRINTF(DMA_DBG, ("xdf@%s: unaligned buf DMA" 2280 "handle alloc failed\n", 2281 ddi_get_name_addr(vdp->xdf_dip))); 2282 return (DDI_FAILURE); 2283 } 2284 vreq->v_memdmahdl = mdh; 2285 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 2286 } 2287 /*FALLTHRU*/ 2288 2289 case VREQ_MEMDMAHDL_ALLOCED: 2290 /* 2291 * alloc 512-byte aligned buf 2292 */ 2293 if (!ALIGNED_XFER(bp)) { 2294 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 2295 bp_mapin(bp); 2296 2297 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 2298 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 2299 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 2300 &aba, &bufsz, &abh); 2301 if (rc != DDI_SUCCESS) { 2302 SETDMACBON(vdp); 2303 DPRINTF(DMA_DBG, ( 2304 "xdf@%s: DMA mem allocation failed\n", 2305 ddi_get_name_addr(vdp->xdf_dip))); 2306 return (DDI_FAILURE); 2307 } 2308 2309 vreq->v_abuf = aba; 2310 vreq->v_align = abh; 2311 vreq->v_status = VREQ_DMAMEM_ALLOCED; 2312 2313 ASSERT(bufsz >= bp->b_bcount); 2314 if (!IS_READ(bp)) 2315 bcopy(bp->b_un.b_addr, vreq->v_abuf, 2316 bp->b_bcount); 2317 } 2318 /*FALLTHRU*/ 2319 2320 case VREQ_DMAMEM_ALLOCED: 2321 /* 2322 * dma bind 2323 */ 2324 if (ALIGNED_XFER(bp)) { 2325 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 2326 dma_flags, xdf_dmacallback, (caddr_t)vdp, 2327 &dc, &ndcs); 2328 } else { 2329 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 2330 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 2331 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 2332 } 2333 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 2334 /* get num of dma windows */ 2335 if (rc == DDI_DMA_PARTIAL_MAP) { 2336 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 2337 ASSERT(rc == DDI_SUCCESS); 2338 } else { 2339 ndws = 1; 2340 } 2341 } else { 2342 SETDMACBON(vdp); 2343 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 2344 ddi_get_name_addr(vdp->xdf_dip))); 2345 return (DDI_FAILURE); 2346 } 2347 2348 vreq->v_dmac = dc; 2349 vreq->v_dmaw = 0; 2350 vreq->v_ndmacs = ndcs; 2351 vreq->v_ndmaws = ndws; 2352 vreq->v_nslots = ndws; 2353 vreq->v_status = VREQ_DMABUF_BOUND; 2354 /*FALLTHRU*/ 2355 2356 case VREQ_DMABUF_BOUND: 2357 /* 2358 * get ge_slot, callback is set upon failure from gs_get(), 2359 * if not set previously 2360 */ 2361 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2362 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2363 ddi_get_name_addr(vdp->xdf_dip))); 2364 return (DDI_FAILURE); 2365 } 2366 2367 vreq->v_gs = gs; 2368 gs->vreq = vreq; 2369 vreq->v_status = VREQ_GS_ALLOCED; 2370 break; 2371 2372 case VREQ_GS_ALLOCED: 2373 /* nothing need to be done */ 2374 break; 2375 2376 case VREQ_DMAWIN_DONE: 2377 /* 2378 * move to the next dma window 2379 */ 2380 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 2381 2382 /* get a ge_slot for this DMA window */ 2383 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2384 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2385 ddi_get_name_addr(vdp->xdf_dip))); 2386 return (DDI_FAILURE); 2387 } 2388 2389 vreq->v_gs = gs; 2390 gs->vreq = vreq; 2391 vreq->v_dmaw++; 2392 rc = ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 2393 &vreq->v_dmac, &vreq->v_ndmacs); 2394 ASSERT(rc == DDI_SUCCESS); 2395 vreq->v_status = VREQ_GS_ALLOCED; 2396 break; 2397 2398 default: 2399 return (DDI_FAILURE); 2400 } 2401 2402 return (DDI_SUCCESS); 2403 } 2404 2405 static ge_slot_t * 2406 gs_get(xdf_t *vdp, int isread) 2407 { 2408 grant_ref_t gh; 2409 ge_slot_t *gs; 2410 2411 /* try to alloc GTEs needed in this slot, first */ 2412 if (gnttab_alloc_grant_references( 2413 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 2414 if (vdp->xdf_gnt_callback.next == NULL) { 2415 SETDMACBON(vdp); 2416 gnttab_request_free_callback( 2417 &vdp->xdf_gnt_callback, 2418 (void (*)(void *))xdf_dmacallback, 2419 (void *)vdp, 2420 BLKIF_MAX_SEGMENTS_PER_REQUEST); 2421 } 2422 return (NULL); 2423 } 2424 2425 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 2426 if (gs == NULL) { 2427 gnttab_free_grant_references(gh); 2428 if (vdp->xdf_timeout_id == 0) 2429 /* restart I/O after one second */ 2430 vdp->xdf_timeout_id = 2431 timeout(xdf_timeout_handler, vdp, hz); 2432 return (NULL); 2433 } 2434 2435 /* init gs_slot */ 2436 list_insert_head(&vdp->xdf_gs_act, (void *)gs); 2437 gs->oeid = vdp->xdf_peer; 2438 gs->isread = isread; 2439 gs->ghead = gh; 2440 gs->ngrefs = 0; 2441 2442 return (gs); 2443 } 2444 2445 static void 2446 gs_free(xdf_t *vdp, ge_slot_t *gs) 2447 { 2448 int i; 2449 grant_ref_t *gp = gs->ge; 2450 int ngrefs = gs->ngrefs; 2451 boolean_t isread = gs->isread; 2452 2453 list_remove(&vdp->xdf_gs_act, (void *)gs); 2454 2455 /* release all grant table entry resources used in this slot */ 2456 for (i = 0; i < ngrefs; i++, gp++) 2457 gnttab_end_foreign_access(*gp, !isread, 0); 2458 gnttab_free_grant_references(gs->ghead); 2459 2460 kmem_cache_free(xdf_gs_cache, (void *)gs); 2461 } 2462 2463 static grant_ref_t 2464 gs_grant(ge_slot_t *gs, mfn_t mfn) 2465 { 2466 grant_ref_t gr = gnttab_claim_grant_reference(&gs->ghead); 2467 2468 ASSERT(gr != -1); 2469 ASSERT(gs->ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 2470 gs->ge[gs->ngrefs++] = gr; 2471 gnttab_grant_foreign_access_ref(gr, gs->oeid, mfn, !gs->isread); 2472 2473 return (gr); 2474 } 2475 2476 static void 2477 unexpectedie(xdf_t *vdp) 2478 { 2479 /* clean up I/Os in ring that have responses */ 2480 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 2481 mutex_exit(&vdp->xdf_dev_lk); 2482 (void) xdf_intr((caddr_t)vdp); 2483 mutex_enter(&vdp->xdf_dev_lk); 2484 } 2485 2486 /* free up all grant table entries */ 2487 while (!list_is_empty(&vdp->xdf_gs_act)) 2488 gs_free(vdp, list_head(&vdp->xdf_gs_act)); 2489 2490 /* 2491 * move bp back to active list orderly 2492 * vreq_busy is updated in vreq_free() 2493 */ 2494 while (!list_is_empty(&vdp->xdf_vreq_act)) { 2495 v_req_t *vreq = list_head(&vdp->xdf_vreq_act); 2496 buf_t *bp = vreq->v_buf; 2497 2498 bp->av_back = NULL; 2499 bp->b_resid = bp->b_bcount; 2500 if (vdp->xdf_f_act == NULL) { 2501 vdp->xdf_f_act = vdp->xdf_l_act = bp; 2502 } else { 2503 /* move to the head of list */ 2504 bp->av_forw = vdp->xdf_f_act; 2505 vdp->xdf_f_act = bp; 2506 } 2507 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 2508 vreq_free(vdp, vreq); 2509 } 2510 } 2511 2512 static void 2513 xdfmin(struct buf *bp) 2514 { 2515 if (bp->b_bcount > xdf_maxphys) 2516 bp->b_bcount = xdf_maxphys; 2517 } 2518