1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * xdf.c - Xen Virtual Block Device Driver 29 * TODO: 30 * - support alternate block size (currently only DEV_BSIZE supported) 31 * - revalidate geometry for removable devices 32 */ 33 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 36 #include <sys/types.h> 37 #include <sys/conf.h> 38 #include <sys/ddi.h> 39 #include <sys/dditypes.h> 40 #include <sys/sunddi.h> 41 #include <sys/list.h> 42 #include <sys/cmlb.h> 43 #include <sys/dkio.h> 44 #include <sys/vtoc.h> 45 #include <sys/modctl.h> 46 #include <sys/bootconf.h> 47 #include <sys/promif.h> 48 #include <sys/sysmacros.h> 49 #include <sys/kstat.h> 50 #include <sys/mach_mmu.h> 51 #ifdef XPV_HVM_DRIVER 52 #include <sys/xpv_support.h> 53 #endif 54 #include <public/io/xenbus.h> 55 #include <xen/sys/xenbus_impl.h> 56 #include <xen/sys/xendev.h> 57 #include <sys/gnttab.h> 58 #include <sys/scsi/generic/inquiry.h> 59 #include <xen/io/blkif_impl.h> 60 #include <io/xdf.h> 61 62 #define FLUSH_DISKCACHE 0x1 63 #define WRITE_BARRIER 0x2 64 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 65 #define USE_WRITE_BARRIER(vdp) \ 66 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 67 #define USE_FLUSH_DISKCACHE(vdp) \ 68 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 69 #define IS_WRITE_BARRIER(vdp, bp) \ 70 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 71 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 72 #define IS_FLUSH_DISKCACHE(bp) \ 73 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 74 75 static void *vbd_ss; 76 static kmem_cache_t *xdf_vreq_cache; 77 static kmem_cache_t *xdf_gs_cache; 78 static int xdf_maxphys = XB_MAXPHYS; 79 int xdfdebug = 0; 80 extern int do_polled_io; 81 diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 82 int xdf_barrier_flush_disable = 0; 83 84 /* 85 * dev_ops and cb_ops entrypoints 86 */ 87 static int xdf_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 88 static int xdf_attach(dev_info_t *, ddi_attach_cmd_t); 89 static int xdf_detach(dev_info_t *, ddi_detach_cmd_t); 90 static int xdf_reset(dev_info_t *, ddi_reset_cmd_t); 91 static int xdf_open(dev_t *, int, int, cred_t *); 92 static int xdf_close(dev_t, int, int, struct cred *); 93 static int xdf_strategy(struct buf *); 94 static int xdf_read(dev_t, struct uio *, cred_t *); 95 static int xdf_aread(dev_t, struct aio_req *, cred_t *); 96 static int xdf_write(dev_t, struct uio *, cred_t *); 97 static int xdf_awrite(dev_t, struct aio_req *, cred_t *); 98 static int xdf_dump(dev_t, caddr_t, daddr_t, int); 99 static int xdf_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 100 static uint_t xdf_intr(caddr_t); 101 static int xdf_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 102 caddr_t, int *); 103 104 /* 105 * misc private functions 106 */ 107 static int xdf_suspend(dev_info_t *); 108 static int xdf_resume(dev_info_t *); 109 static int xdf_start_connect(xdf_t *); 110 static int xdf_start_disconnect(xdf_t *); 111 static int xdf_post_connect(xdf_t *); 112 static void xdf_post_disconnect(xdf_t *); 113 static void xdf_oe_change(dev_info_t *, ddi_eventcookie_t, void *, void *); 114 static void xdf_iostart(xdf_t *); 115 static void xdf_iofini(xdf_t *, uint64_t, int); 116 static int xdf_prepare_rreq(xdf_t *, struct buf *, blkif_request_t *); 117 static int xdf_drain_io(xdf_t *); 118 static boolean_t xdf_isopen(xdf_t *, int); 119 static int xdf_check_state_transition(xdf_t *, XenbusState); 120 static int xdf_connect(xdf_t *, boolean_t); 121 static int xdf_dmacallback(caddr_t); 122 static void xdf_timeout_handler(void *); 123 static uint_t xdf_iorestart(caddr_t); 124 static v_req_t *vreq_get(xdf_t *, buf_t *); 125 static void vreq_free(xdf_t *, v_req_t *); 126 static int vreq_setup(xdf_t *, v_req_t *); 127 static ge_slot_t *gs_get(xdf_t *, int); 128 static void gs_free(xdf_t *, ge_slot_t *); 129 static grant_ref_t gs_grant(ge_slot_t *, mfn_t); 130 static void unexpectedie(xdf_t *); 131 static void xdfmin(struct buf *); 132 133 static struct cb_ops xdf_cbops = { 134 xdf_open, 135 xdf_close, 136 xdf_strategy, 137 nodev, 138 xdf_dump, 139 xdf_read, 140 xdf_write, 141 xdf_ioctl, 142 nodev, 143 nodev, 144 nodev, 145 nochpoll, 146 xdf_prop_op, 147 NULL, 148 D_MP | D_NEW | D_64BIT, 149 CB_REV, 150 xdf_aread, 151 xdf_awrite 152 }; 153 154 struct dev_ops xdf_devops = { 155 DEVO_REV, /* devo_rev */ 156 0, /* devo_refcnt */ 157 xdf_getinfo, /* devo_getinfo */ 158 nulldev, /* devo_identify */ 159 nulldev, /* devo_probe */ 160 xdf_attach, /* devo_attach */ 161 xdf_detach, /* devo_detach */ 162 xdf_reset, /* devo_reset */ 163 &xdf_cbops, /* devo_cb_ops */ 164 (struct bus_ops *)NULL /* devo_bus_ops */ 165 }; 166 167 static struct modldrv modldrv = { 168 &mod_driverops, /* Type of module. This one is a driver */ 169 "virtual block driver %I%", /* short description */ 170 &xdf_devops /* driver specific ops */ 171 }; 172 173 static struct modlinkage xdf_modlinkage = { 174 MODREV_1, (void *)&modldrv, NULL 175 }; 176 177 /* 178 * I/O buffer DMA attributes 179 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 180 */ 181 static ddi_dma_attr_t xb_dma_attr = { 182 DMA_ATTR_V0, 183 (uint64_t)0, /* lowest address */ 184 (uint64_t)0xffffffffffffffff, /* highest usable address */ 185 (uint64_t)0xffffff, /* DMA counter limit max */ 186 (uint64_t)XB_BSIZE, /* alignment in bytes */ 187 XB_BSIZE - 1, /* bitmap of burst sizes */ 188 XB_BSIZE, /* min transfer */ 189 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 190 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 191 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 192 XB_BSIZE, /* granularity */ 193 0, /* flags (reserved) */ 194 }; 195 196 static ddi_device_acc_attr_t xc_acc_attr = { 197 DDI_DEVICE_ATTR_V0, 198 DDI_NEVERSWAP_ACC, 199 DDI_STRICTORDER_ACC 200 }; 201 202 /* callbacks from commmon label */ 203 204 static int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 205 void *); 206 static int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 207 208 static cmlb_tg_ops_t xdf_lb_ops = { 209 TG_DK_OPS_VERSION_1, 210 xdf_lb_rdwr, 211 xdf_lb_getinfo 212 }; 213 214 int 215 _init(void) 216 { 217 int rc; 218 219 if ((rc = ddi_soft_state_init(&vbd_ss, sizeof (xdf_t), 0)) == 0) { 220 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 221 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 222 ASSERT(xdf_vreq_cache != NULL); 223 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 224 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 225 ASSERT(xdf_gs_cache != NULL); 226 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 227 kmem_cache_destroy(xdf_vreq_cache); 228 kmem_cache_destroy(xdf_gs_cache); 229 ddi_soft_state_fini(&vbd_ss); 230 } 231 } 232 233 return (rc); 234 } 235 236 int 237 _fini(void) 238 { 239 int err; 240 241 if ((err = mod_remove(&xdf_modlinkage)) != 0) 242 return (err); 243 244 kmem_cache_destroy(xdf_vreq_cache); 245 kmem_cache_destroy(xdf_gs_cache); 246 ddi_soft_state_fini(&vbd_ss); 247 248 return (0); 249 } 250 251 int 252 _info(struct modinfo *modinfop) 253 { 254 return (mod_info(&xdf_modlinkage, modinfop)); 255 } 256 257 /*ARGSUSED*/ 258 static int 259 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 260 { 261 int instance; 262 xdf_t *vbdp; 263 264 instance = XDF_INST(getminor((dev_t)arg)); 265 266 switch (cmd) { 267 case DDI_INFO_DEVT2DEVINFO: 268 if ((vbdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) { 269 *rp = NULL; 270 return (DDI_FAILURE); 271 } 272 *rp = vbdp->xdf_dip; 273 return (DDI_SUCCESS); 274 275 case DDI_INFO_DEVT2INSTANCE: 276 *rp = (void *)(uintptr_t)instance; 277 return (DDI_SUCCESS); 278 279 default: 280 return (DDI_FAILURE); 281 } 282 } 283 284 static int 285 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 286 char *name, caddr_t valuep, int *lengthp) 287 { 288 int instance = ddi_get_instance(dip); 289 xdf_t *vdp; 290 diskaddr_t p_blkcnt; 291 292 /* 293 * xdf dynamic properties are device specific and size oriented. 294 * Requests issued under conditions where size is valid are passed 295 * to ddi_prop_op_nblocks with the size information, otherwise the 296 * request is passed to ddi_prop_op. 297 */ 298 vdp = ddi_get_soft_state(vbd_ss, instance); 299 300 if ((dev == DDI_DEV_T_ANY) || (vdp == NULL)) 301 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 302 name, valuep, lengthp)); 303 304 /* do cv_wait until connected or failed */ 305 mutex_enter(&vdp->xdf_dev_lk); 306 if (xdf_connect(vdp, B_TRUE) != XD_READY) { 307 mutex_exit(&vdp->xdf_dev_lk); 308 goto out; 309 } 310 mutex_exit(&vdp->xdf_dev_lk); 311 312 if (cmlb_partinfo(vdp->xdf_vd_lbl, XDF_PART(getminor(dev)), &p_blkcnt, 313 NULL, NULL, NULL, NULL) == 0) 314 return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags, 315 name, valuep, lengthp, (uint64_t)p_blkcnt)); 316 317 out: 318 return (ddi_prop_op(dev, dip, prop_op, mod_flags, name, valuep, 319 lengthp)); 320 } 321 322 static int 323 xdf_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 324 { 325 xdf_t *vdp; 326 ddi_iblock_cookie_t ibc; 327 ddi_iblock_cookie_t softibc; 328 int instance; 329 330 xdfdebug = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_NOTPROM, 331 "xdfdebug", 0); 332 333 switch (cmd) { 334 case DDI_ATTACH: 335 break; 336 337 case DDI_RESUME: 338 return (xdf_resume(devi)); 339 340 default: 341 return (DDI_FAILURE); 342 } 343 344 instance = ddi_get_instance(devi); 345 if (ddi_soft_state_zalloc(vbd_ss, instance) != DDI_SUCCESS) 346 return (DDI_FAILURE); 347 348 DPRINTF(DDI_DBG, ("xdf%d: attaching\n", instance)); 349 vdp = ddi_get_soft_state(vbd_ss, instance); 350 vdp->xdf_dip = devi; 351 if (ddi_get_iblock_cookie(devi, 0, &ibc) != DDI_SUCCESS) { 352 cmn_err(CE_WARN, "xdf@%s: failed to get iblock cookie", 353 ddi_get_name_addr(devi)); 354 goto errout1; 355 } 356 357 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc); 358 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc); 359 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 360 ddi_set_driver_private(devi, vdp); 361 362 if (ddi_get_soft_iblock_cookie(devi, DDI_SOFTINT_LOW, &softibc) 363 != DDI_SUCCESS) { 364 cmn_err(CE_WARN, "xdf@%s: failed to get softintr iblock cookie", 365 ddi_get_name_addr(devi)); 366 goto errout2; 367 } 368 if (ddi_add_softintr(devi, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 369 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 370 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 371 ddi_get_name_addr(devi)); 372 goto errout2; 373 } 374 375 /* 376 * create kstat for iostat(1M) 377 */ 378 if ((vdp->xdf_xdev_iostat = kstat_create("xdf", instance, NULL, "disk", 379 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) != NULL) { 380 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 381 kstat_install(vdp->xdf_xdev_iostat); 382 } else { 383 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 384 ddi_get_name_addr(devi)); 385 goto errout3; 386 } 387 388 /* 389 * driver handles kernel-issued IOCTLs 390 */ 391 if (ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP, 392 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 393 cmn_err(CE_WARN, "xdf@%s: cannot create DDI_KERNEL_IOCTL prop", 394 ddi_get_name_addr(devi)); 395 goto errout4; 396 } 397 398 /* 399 * create default device minor nodes: non-removable disk 400 * we will adjust minor nodes after we are connected w/ backend 401 */ 402 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 403 if (cmlb_attach(devi, &xdf_lb_ops, DTYPE_DIRECT, 0, 1, DDI_NT_BLOCK, 404 CMLB_FAKE_LABEL_ONE_PARTITION, vdp->xdf_vd_lbl, NULL) != 0) { 405 cmn_err(CE_WARN, "xdf@%s: default cmlb attach failed", 406 ddi_get_name_addr(devi)); 407 goto errout5; 408 } 409 410 /* 411 * We ship with cache-enabled disks 412 */ 413 vdp->xdf_wce = 1; 414 415 mutex_enter(&vdp->xdf_cb_lk); 416 417 /* Watch backend XenbusState change */ 418 if (xvdi_add_event_handler(devi, XS_OE_STATE, 419 xdf_oe_change) != DDI_SUCCESS) { 420 mutex_exit(&vdp->xdf_cb_lk); 421 goto errout6; 422 } 423 424 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 425 cmn_err(CE_WARN, "xdf@%s: start connection failed", 426 ddi_get_name_addr(devi)); 427 (void) xdf_start_disconnect(vdp); 428 mutex_exit(&vdp->xdf_cb_lk); 429 goto errout7; 430 } 431 432 mutex_exit(&vdp->xdf_cb_lk); 433 434 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 435 offsetof(v_req_t, v_link)); 436 list_create(&vdp->xdf_gs_act, sizeof (ge_slot_t), 437 offsetof(ge_slot_t, link)); 438 439 ddi_report_dev(devi); 440 DPRINTF(DDI_DBG, ("xdf%d: attached\n", instance)); 441 442 return (DDI_SUCCESS); 443 444 errout7: 445 xvdi_remove_event_handler(devi, XS_OE_STATE); 446 errout6: 447 cmlb_detach(vdp->xdf_vd_lbl, NULL); 448 errout5: 449 cmlb_free_handle(&vdp->xdf_vd_lbl); 450 ddi_prop_remove_all(devi); 451 errout4: 452 kstat_delete(vdp->xdf_xdev_iostat); 453 errout3: 454 ddi_remove_softintr(vdp->xdf_softintr_id); 455 errout2: 456 ddi_set_driver_private(devi, NULL); 457 cv_destroy(&vdp->xdf_dev_cv); 458 mutex_destroy(&vdp->xdf_cb_lk); 459 mutex_destroy(&vdp->xdf_dev_lk); 460 errout1: 461 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(devi)); 462 ddi_soft_state_free(vbd_ss, instance); 463 return (DDI_FAILURE); 464 } 465 466 static int 467 xdf_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 468 { 469 xdf_t *vdp; 470 int instance; 471 472 switch (cmd) { 473 474 case DDI_PM_SUSPEND: 475 break; 476 477 case DDI_SUSPEND: 478 return (xdf_suspend(devi)); 479 480 case DDI_DETACH: 481 break; 482 483 default: 484 return (DDI_FAILURE); 485 } 486 487 instance = ddi_get_instance(devi); 488 DPRINTF(DDI_DBG, ("xdf%d: detaching\n", instance)); 489 vdp = ddi_get_soft_state(vbd_ss, instance); 490 491 if (vdp == NULL) 492 return (DDI_FAILURE); 493 494 mutex_enter(&vdp->xdf_dev_lk); 495 if (xdf_isopen(vdp, -1)) { 496 mutex_exit(&vdp->xdf_dev_lk); 497 return (DDI_FAILURE); 498 } 499 500 if (vdp->xdf_status != XD_CLOSED) { 501 mutex_exit(&vdp->xdf_dev_lk); 502 return (DDI_FAILURE); 503 } 504 505 ASSERT(!ISDMACBON(vdp)); 506 mutex_exit(&vdp->xdf_dev_lk); 507 508 if (vdp->xdf_timeout_id != 0) 509 (void) untimeout(vdp->xdf_timeout_id); 510 511 xvdi_remove_event_handler(devi, XS_OE_STATE); 512 513 /* we'll support backend running in domU later */ 514 #ifdef DOMU_BACKEND 515 (void) xvdi_post_event(devi, XEN_HP_REMOVE); 516 #endif 517 518 list_destroy(&vdp->xdf_vreq_act); 519 list_destroy(&vdp->xdf_gs_act); 520 ddi_prop_remove_all(devi); 521 kstat_delete(vdp->xdf_xdev_iostat); 522 ddi_remove_softintr(vdp->xdf_softintr_id); 523 ddi_set_driver_private(devi, NULL); 524 cv_destroy(&vdp->xdf_dev_cv); 525 mutex_destroy(&vdp->xdf_cb_lk); 526 mutex_destroy(&vdp->xdf_dev_lk); 527 if (vdp->xdf_cache_flush_block != NULL) 528 kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE); 529 ddi_soft_state_free(vbd_ss, instance); 530 return (DDI_SUCCESS); 531 } 532 533 static int 534 xdf_suspend(dev_info_t *devi) 535 { 536 xdf_t *vdp; 537 int instance; 538 enum xdf_state st; 539 540 instance = ddi_get_instance(devi); 541 542 if (xdfdebug & SUSRES_DBG) 543 xen_printf("xdf_suspend: xdf#%d\n", instance); 544 545 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 546 return (DDI_FAILURE); 547 548 xvdi_suspend(devi); 549 550 mutex_enter(&vdp->xdf_cb_lk); 551 mutex_enter(&vdp->xdf_dev_lk); 552 st = vdp->xdf_status; 553 /* change status to stop further I/O requests */ 554 if (st == XD_READY) 555 vdp->xdf_status = XD_SUSPEND; 556 mutex_exit(&vdp->xdf_dev_lk); 557 mutex_exit(&vdp->xdf_cb_lk); 558 559 /* make sure no more I/O responses left in the ring buffer */ 560 if ((st == XD_INIT) || (st == XD_READY)) { 561 #ifdef XPV_HVM_DRIVER 562 ec_unbind_evtchn(vdp->xdf_evtchn); 563 #else 564 (void) ddi_remove_intr(devi, 0, NULL); 565 #endif 566 (void) xdf_drain_io(vdp); 567 /* 568 * no need to teardown the ring buffer here 569 * it will be simply re-init'ed during resume when 570 * we call xvdi_alloc_ring 571 */ 572 } 573 574 if (xdfdebug & SUSRES_DBG) 575 xen_printf("xdf_suspend: SUCCESS\n"); 576 577 return (DDI_SUCCESS); 578 } 579 580 /*ARGSUSED*/ 581 static int 582 xdf_resume(dev_info_t *devi) 583 { 584 xdf_t *vdp; 585 int instance; 586 587 instance = ddi_get_instance(devi); 588 if (xdfdebug & SUSRES_DBG) 589 xen_printf("xdf_resume: xdf%d\n", instance); 590 591 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 592 return (DDI_FAILURE); 593 594 mutex_enter(&vdp->xdf_cb_lk); 595 596 if (xvdi_resume(devi) != DDI_SUCCESS) { 597 mutex_exit(&vdp->xdf_cb_lk); 598 return (DDI_FAILURE); 599 } 600 601 mutex_enter(&vdp->xdf_dev_lk); 602 ASSERT(vdp->xdf_status != XD_READY); 603 vdp->xdf_status = XD_UNKNOWN; 604 mutex_exit(&vdp->xdf_dev_lk); 605 606 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 607 mutex_exit(&vdp->xdf_cb_lk); 608 return (DDI_FAILURE); 609 } 610 611 mutex_exit(&vdp->xdf_cb_lk); 612 613 if (xdfdebug & SUSRES_DBG) 614 xen_printf("xdf_resume: done\n"); 615 return (DDI_SUCCESS); 616 } 617 618 /*ARGSUSED*/ 619 static int 620 xdf_reset(dev_info_t *devi, ddi_reset_cmd_t cmd) 621 { 622 xdf_t *vdp; 623 int instance; 624 625 instance = ddi_get_instance(devi); 626 DPRINTF(DDI_DBG, ("xdf%d: resetting\n", instance)); 627 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 628 return (DDI_FAILURE); 629 630 /* 631 * wait for any outstanding I/O to complete 632 */ 633 (void) xdf_drain_io(vdp); 634 635 DPRINTF(DDI_DBG, ("xdf%d: reset complete\n", instance)); 636 return (DDI_SUCCESS); 637 } 638 639 static int 640 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 641 { 642 minor_t minor; 643 xdf_t *vdp; 644 int part; 645 ulong_t parbit; 646 diskaddr_t p_blkct = 0; 647 boolean_t firstopen; 648 boolean_t nodelay; 649 650 nodelay = (flag & (FNDELAY | FNONBLOCK)); 651 minor = getminor(*devp); 652 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 653 return (ENXIO); 654 655 DPRINTF(DDI_DBG, ("xdf%d: opening\n", XDF_INST(minor))); 656 657 /* do cv_wait until connected or failed */ 658 mutex_enter(&vdp->xdf_dev_lk); 659 if (!nodelay && (xdf_connect(vdp, B_TRUE) != XD_READY)) { 660 mutex_exit(&vdp->xdf_dev_lk); 661 return (ENXIO); 662 } 663 664 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 665 mutex_exit(&vdp->xdf_dev_lk); 666 return (EROFS); 667 } 668 669 part = XDF_PART(minor); 670 parbit = 1 << part; 671 if (vdp->xdf_vd_exclopen & parbit) { 672 mutex_exit(&vdp->xdf_dev_lk); 673 return (EBUSY); 674 } 675 676 /* are we the first one to open this node? */ 677 firstopen = !xdf_isopen(vdp, -1); 678 679 if ((flag & FEXCL) && !firstopen) { 680 mutex_exit(&vdp->xdf_dev_lk); 681 return (EBUSY); 682 } 683 684 if (otyp == OTYP_LYR) 685 vdp->xdf_vd_lyropen[part]++; 686 687 vdp->xdf_vd_open[otyp] |= parbit; 688 689 if (flag & FEXCL) 690 vdp->xdf_vd_exclopen |= parbit; 691 692 mutex_exit(&vdp->xdf_dev_lk); 693 694 /* force a re-validation */ 695 if (firstopen) 696 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 697 698 /* 699 * check size 700 * ignore CD/DVD which contains a zero-sized s0 701 */ 702 if (!nodelay && !XD_IS_CD(vdp) && 703 ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 704 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0))) { 705 (void) xdf_close(*devp, flag, otyp, credp); 706 return (ENXIO); 707 } 708 709 return (0); 710 } 711 712 /*ARGSUSED*/ 713 static int 714 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 715 { 716 minor_t minor; 717 xdf_t *vdp; 718 int part; 719 ulong_t parbit; 720 721 minor = getminor(dev); 722 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 723 return (ENXIO); 724 725 mutex_enter(&vdp->xdf_dev_lk); 726 part = XDF_PART(minor); 727 if (!xdf_isopen(vdp, part)) { 728 mutex_exit(&vdp->xdf_dev_lk); 729 return (ENXIO); 730 } 731 parbit = 1 << part; 732 733 if (otyp == OTYP_LYR) { 734 if (vdp->xdf_vd_lyropen[part] != 0) 735 vdp->xdf_vd_lyropen[part]--; 736 if (vdp->xdf_vd_lyropen[part] == 0) 737 vdp->xdf_vd_open[OTYP_LYR] &= ~parbit; 738 } else { 739 vdp->xdf_vd_open[otyp] &= ~parbit; 740 } 741 vdp->xdf_vd_exclopen &= ~parbit; 742 743 mutex_exit(&vdp->xdf_dev_lk); 744 return (0); 745 } 746 747 static int 748 xdf_strategy(struct buf *bp) 749 { 750 xdf_t *vdp; 751 minor_t minor; 752 diskaddr_t p_blkct, p_blkst; 753 ulong_t nblks; 754 int part; 755 756 minor = getminor(bp->b_edev); 757 part = XDF_PART(minor); 758 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) || 759 !xdf_isopen(vdp, part) || 760 cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 761 &p_blkst, NULL, NULL, NULL)) { 762 bioerror(bp, ENXIO); 763 bp->b_resid = bp->b_bcount; 764 biodone(bp); 765 return (0); 766 } 767 768 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 769 bioerror(bp, EROFS); 770 bp->b_resid = bp->b_bcount; 771 biodone(bp); 772 return (0); 773 } 774 775 /* 776 * starting beyond partition 777 */ 778 if (bp->b_blkno > p_blkct) { 779 DPRINTF(IO_DBG, ("xdf: block %lld exceeds VBD size %"PRIu64, 780 (longlong_t)bp->b_blkno, (uint64_t)p_blkct)); 781 bioerror(bp, EINVAL); 782 bp->b_resid = bp->b_bcount; 783 biodone(bp); 784 return (0); 785 } 786 787 /* Legacy: don't set error flag at this case */ 788 if (bp->b_blkno == p_blkct) { 789 bp->b_resid = bp->b_bcount; 790 biodone(bp); 791 return (0); 792 } 793 794 /* 795 * adjust for partial transfer 796 */ 797 nblks = bp->b_bcount >> XB_BSHIFT; 798 if ((bp->b_blkno + nblks) > p_blkct) { 799 bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT; 800 bp->b_bcount -= bp->b_resid; 801 } 802 803 804 DPRINTF(IO_DBG, ("xdf: strategy blk %lld len %lu\n", 805 (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount)); 806 807 mutex_enter(&vdp->xdf_dev_lk); 808 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 809 if (vdp->xdf_f_act == NULL) { 810 vdp->xdf_f_act = vdp->xdf_l_act = bp; 811 } else { 812 vdp->xdf_l_act->av_forw = bp; 813 vdp->xdf_l_act = bp; 814 } 815 bp->av_forw = NULL; 816 bp->av_back = NULL; /* not tagged with a v_req */ 817 bp->b_private = (void *)(uintptr_t)p_blkst; 818 mutex_exit(&vdp->xdf_dev_lk); 819 xdf_iostart(vdp); 820 if (do_polled_io) 821 (void) xdf_drain_io(vdp); 822 return (0); 823 } 824 825 /*ARGSUSED*/ 826 static int 827 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 828 { 829 830 xdf_t *vdp; 831 minor_t minor; 832 diskaddr_t p_blkcnt; 833 int part; 834 835 minor = getminor(dev); 836 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 837 return (ENXIO); 838 839 DPRINTF(IO_DBG, ("xdf: read offset 0x%"PRIx64"\n", 840 (int64_t)uiop->uio_offset)); 841 842 part = XDF_PART(minor); 843 if (!xdf_isopen(vdp, part)) 844 return (ENXIO); 845 846 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 847 NULL, NULL, NULL, NULL)) 848 return (ENXIO); 849 850 if (U_INVAL(uiop)) 851 return (EINVAL); 852 853 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 854 } 855 856 /*ARGSUSED*/ 857 static int 858 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 859 { 860 xdf_t *vdp; 861 minor_t minor; 862 diskaddr_t p_blkcnt; 863 int part; 864 865 minor = getminor(dev); 866 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 867 return (ENXIO); 868 869 DPRINTF(IO_DBG, ("xdf: write offset 0x%"PRIx64"\n", 870 (int64_t)uiop->uio_offset)); 871 872 part = XDF_PART(minor); 873 if (!xdf_isopen(vdp, part)) 874 return (ENXIO); 875 876 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 877 NULL, NULL, NULL, NULL)) 878 return (ENXIO); 879 880 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 881 return (ENOSPC); 882 883 if (U_INVAL(uiop)) 884 return (EINVAL); 885 886 return (physio(xdf_strategy, NULL, dev, B_WRITE, minphys, uiop)); 887 } 888 889 /*ARGSUSED*/ 890 static int 891 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 892 { 893 xdf_t *vdp; 894 minor_t minor; 895 struct uio *uiop = aiop->aio_uio; 896 diskaddr_t p_blkcnt; 897 int part; 898 899 minor = getminor(dev); 900 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 901 return (ENXIO); 902 903 part = XDF_PART(minor); 904 if (!xdf_isopen(vdp, part)) 905 return (ENXIO); 906 907 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 908 NULL, NULL, NULL, NULL)) 909 return (ENXIO); 910 911 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 912 return (ENOSPC); 913 914 if (U_INVAL(uiop)) 915 return (EINVAL); 916 917 return (aphysio(xdf_strategy, anocancel, dev, B_READ, minphys, aiop)); 918 } 919 920 /*ARGSUSED*/ 921 static int 922 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 923 { 924 xdf_t *vdp; 925 minor_t minor; 926 struct uio *uiop = aiop->aio_uio; 927 diskaddr_t p_blkcnt; 928 int part; 929 930 minor = getminor(dev); 931 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 932 return (ENXIO); 933 934 part = XDF_PART(minor); 935 if (!xdf_isopen(vdp, part)) 936 return (ENXIO); 937 938 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 939 NULL, NULL, NULL, NULL)) 940 return (ENXIO); 941 942 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 943 return (ENOSPC); 944 945 if (U_INVAL(uiop)) 946 return (EINVAL); 947 948 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, minphys, aiop)); 949 } 950 951 static int 952 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 953 { 954 struct buf dumpbuf, *dbp; 955 xdf_t *vdp; 956 minor_t minor; 957 int err = 0; 958 int part; 959 diskaddr_t p_blkcnt, p_blkst; 960 961 minor = getminor(dev); 962 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 963 return (ENXIO); 964 965 DPRINTF(IO_DBG, ("xdf: dump addr (0x%p) blk (%ld) nblks (%d)\n", 966 addr, blkno, nblk)); 967 968 part = XDF_PART(minor); 969 if (!xdf_isopen(vdp, part)) 970 return (ENXIO); 971 972 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 973 NULL, NULL, NULL)) 974 return (ENXIO); 975 976 if ((blkno + nblk) > p_blkcnt) { 977 cmn_err(CE_WARN, "xdf: block %ld exceeds VBD size %"PRIu64, 978 blkno + nblk, (uint64_t)vdp->xdf_xdev_nblocks); 979 return (EINVAL); 980 } 981 982 dbp = &dumpbuf; 983 bioinit(dbp); 984 dbp->b_flags = B_BUSY; 985 dbp->b_un.b_addr = addr; 986 dbp->b_bcount = nblk << DEV_BSHIFT; 987 dbp->b_resid = 0; 988 dbp->b_blkno = blkno; 989 dbp->b_edev = dev; 990 dbp->b_private = (void *)(uintptr_t)p_blkst; 991 992 mutex_enter(&vdp->xdf_dev_lk); 993 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 994 if (vdp->xdf_f_act == NULL) { 995 vdp->xdf_f_act = vdp->xdf_l_act = dbp; 996 } else { 997 vdp->xdf_l_act->av_forw = dbp; 998 vdp->xdf_l_act = dbp; 999 } 1000 dbp->av_forw = NULL; 1001 dbp->av_back = NULL; 1002 mutex_exit(&vdp->xdf_dev_lk); 1003 xdf_iostart(vdp); 1004 err = xdf_drain_io(vdp); 1005 biofini(dbp); 1006 return (err); 1007 } 1008 1009 /*ARGSUSED*/ 1010 static int 1011 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 1012 int *rvalp) 1013 { 1014 int instance; 1015 xdf_t *vdp; 1016 minor_t minor; 1017 int part; 1018 1019 minor = getminor(dev); 1020 instance = XDF_INST(minor); 1021 1022 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 1023 return (ENXIO); 1024 1025 DPRINTF(IOCTL_DBG, ("xdf%d:ioctl: cmd %d (0x%x)\n", 1026 instance, cmd, cmd)); 1027 1028 part = XDF_PART(minor); 1029 if (!xdf_isopen(vdp, part)) 1030 return (ENXIO); 1031 1032 switch (cmd) { 1033 case DKIOCGMEDIAINFO: { 1034 struct dk_minfo media_info; 1035 1036 media_info.dki_lbsize = DEV_BSIZE; 1037 media_info.dki_capacity = vdp->xdf_xdev_nblocks; 1038 media_info.dki_media_type = DK_FIXED_DISK; 1039 1040 if (ddi_copyout(&media_info, (void *)arg, 1041 sizeof (struct dk_minfo), mode)) { 1042 return (EFAULT); 1043 } else { 1044 return (0); 1045 } 1046 } 1047 1048 case DKIOCINFO: { 1049 struct dk_cinfo info; 1050 1051 /* controller information */ 1052 if (XD_IS_CD(vdp)) 1053 info.dki_ctype = DKC_CDROM; 1054 else 1055 info.dki_ctype = DKC_VBD; 1056 1057 info.dki_cnum = 0; 1058 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 1059 1060 /* unit information */ 1061 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 1062 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 1063 info.dki_flags = DKI_FMTVOL; 1064 info.dki_partition = part; 1065 info.dki_maxtransfer = maxphys / DEV_BSIZE; 1066 info.dki_addr = 0; 1067 info.dki_space = 0; 1068 info.dki_prio = 0; 1069 info.dki_vec = 0; 1070 1071 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 1072 return (EFAULT); 1073 else 1074 return (0); 1075 } 1076 1077 case DKIOCSTATE: { 1078 enum dkio_state dkstate = DKIO_INSERTED; 1079 if (ddi_copyout(&dkstate, (void *)arg, sizeof (dkstate), 1080 mode) != 0) 1081 return (EFAULT); 1082 return (0); 1083 } 1084 1085 /* 1086 * is media removable? 1087 */ 1088 case DKIOCREMOVABLE: { 1089 int i = XD_IS_RM(vdp) ? 1 : 0; 1090 if (ddi_copyout(&i, (caddr_t)arg, sizeof (int), mode)) 1091 return (EFAULT); 1092 return (0); 1093 } 1094 1095 case DKIOCG_PHYGEOM: 1096 case DKIOCG_VIRTGEOM: 1097 case DKIOCGGEOM: 1098 case DKIOCSGEOM: 1099 case DKIOCGAPART: 1100 case DKIOCGVTOC: 1101 case DKIOCSVTOC: 1102 case DKIOCPARTINFO: 1103 case DKIOCGETEFI: 1104 case DKIOCSETEFI: 1105 case DKIOCPARTITION: { 1106 int rc; 1107 1108 rc = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 1109 rvalp, NULL); 1110 return (rc); 1111 } 1112 1113 case DKIOCGETWCE: 1114 if (ddi_copyout(&vdp->xdf_wce, (void *)arg, 1115 sizeof (vdp->xdf_wce), mode)) 1116 return (EFAULT); 1117 return (0); 1118 case DKIOCSETWCE: 1119 if (ddi_copyin((void *)arg, &vdp->xdf_wce, 1120 sizeof (vdp->xdf_wce), mode)) 1121 return (EFAULT); 1122 return (0); 1123 case DKIOCFLUSHWRITECACHE: { 1124 int rc; 1125 struct dk_callback *dkc = (struct dk_callback *)arg; 1126 1127 if (vdp->xdf_flush_supported) { 1128 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1129 NULL, 0, 0, (void *)dev); 1130 } else if (vdp->xdf_feature_barrier && 1131 !xdf_barrier_flush_disable) { 1132 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1133 vdp->xdf_cache_flush_block, xdf_flush_block, 1134 DEV_BSIZE, (void *)dev); 1135 } else { 1136 return (ENOTTY); 1137 } 1138 if ((mode & FKIOCTL) && (dkc != NULL) && 1139 (dkc->dkc_callback != NULL)) { 1140 (*dkc->dkc_callback)(dkc->dkc_cookie, rc); 1141 /* need to return 0 after calling callback */ 1142 rc = 0; 1143 } 1144 return (rc); 1145 } 1146 1147 default: 1148 return (ENOTTY); 1149 } 1150 } 1151 1152 /* 1153 * xdf interrupt handler 1154 */ 1155 static uint_t 1156 xdf_intr(caddr_t arg) 1157 { 1158 xdf_t *vdp = (xdf_t *)arg; 1159 xendev_ring_t *xbr; 1160 blkif_response_t *resp; 1161 int bioerr; 1162 uint64_t id; 1163 extern int do_polled_io; 1164 uint8_t op; 1165 uint16_t status; 1166 ddi_acc_handle_t acchdl; 1167 1168 mutex_enter(&vdp->xdf_dev_lk); 1169 1170 if ((xbr = vdp->xdf_xb_ring) == NULL) { 1171 mutex_exit(&vdp->xdf_dev_lk); 1172 return (DDI_INTR_UNCLAIMED); 1173 } 1174 1175 acchdl = vdp->xdf_xb_ring_hdl; 1176 1177 /* 1178 * complete all requests which have a response 1179 */ 1180 while (resp = xvdi_ring_get_response(xbr)) { 1181 id = ddi_get64(acchdl, &resp->id); 1182 op = ddi_get8(acchdl, &resp->operation); 1183 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 1184 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 1185 op, id, status)); 1186 1187 /* 1188 * XXPV - close connection to the backend and restart 1189 */ 1190 if (status != BLKIF_RSP_OKAY) { 1191 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 1192 ddi_get_name_addr(vdp->xdf_dip), 1193 (op == BLKIF_OP_READ) ? "reading" : "writing")); 1194 bioerr = EIO; 1195 } else { 1196 bioerr = 0; 1197 } 1198 1199 xdf_iofini(vdp, id, bioerr); 1200 } 1201 1202 mutex_exit(&vdp->xdf_dev_lk); 1203 1204 if (!do_polled_io) 1205 xdf_iostart(vdp); 1206 1207 return (DDI_INTR_CLAIMED); 1208 } 1209 1210 int xdf_fbrewrites; /* how many times was our flush block rewritten */ 1211 1212 /* 1213 * Snarf new data if our flush block was re-written 1214 */ 1215 static void 1216 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 1217 { 1218 int nblks; 1219 boolean_t mapin; 1220 1221 if (IS_WRITE_BARRIER(vdp, bp)) 1222 return; /* write was a flush write */ 1223 1224 mapin = B_FALSE; 1225 nblks = bp->b_bcount >> DEV_BSHIFT; 1226 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 1227 xdf_fbrewrites++; 1228 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 1229 mapin = B_TRUE; 1230 bp_mapin(bp); 1231 } 1232 bcopy(bp->b_un.b_addr + 1233 ((xdf_flush_block - blkno) << DEV_BSHIFT), 1234 vdp->xdf_cache_flush_block, DEV_BSIZE); 1235 if (mapin) 1236 bp_mapout(bp); 1237 } 1238 } 1239 1240 static void 1241 xdf_iofini(xdf_t *vdp, uint64_t id, int bioerr) 1242 { 1243 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 1244 v_req_t *vreq = gs->vreq; 1245 buf_t *bp = vreq->v_buf; 1246 1247 gs_free(vdp, gs); 1248 if (bioerr) 1249 bioerror(bp, bioerr); 1250 vreq->v_nslots--; 1251 if (vreq->v_nslots != 0) 1252 return; 1253 1254 XDF_UPDATE_IO_STAT(vdp, bp); 1255 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1256 1257 if (IS_ERROR(bp)) 1258 bp->b_resid = bp->b_bcount; 1259 1260 vreq_free(vdp, vreq); 1261 biodone(bp); 1262 } 1263 1264 /* 1265 * return value of xdf_prepare_rreq() 1266 * used in xdf_iostart() 1267 */ 1268 #define XF_PARTIAL 0 /* rreq is full, not all I/O in buf transferred */ 1269 #define XF_COMP 1 /* no more I/O left in buf */ 1270 1271 static void 1272 xdf_iostart(xdf_t *vdp) 1273 { 1274 xendev_ring_t *xbr; 1275 struct buf *bp; 1276 blkif_request_t *rreq; 1277 int retval; 1278 int rreqready = 0; 1279 1280 xbr = vdp->xdf_xb_ring; 1281 1282 /* 1283 * populate the ring request(s) 1284 * 1285 * loop until there is no buf to transfer or no free slot 1286 * available in I/O ring 1287 */ 1288 mutex_enter(&vdp->xdf_dev_lk); 1289 1290 for (;;) { 1291 if (vdp->xdf_status != XD_READY) 1292 break; 1293 1294 /* active buf queue empty? */ 1295 if ((bp = vdp->xdf_f_act) == NULL) 1296 break; 1297 1298 /* try to grab a vreq for this bp */ 1299 if ((BP2VREQ(bp) == NULL) && (vreq_get(vdp, bp) == NULL)) 1300 break; 1301 /* alloc DMA/GTE resources */ 1302 if (vreq_setup(vdp, BP2VREQ(bp)) != DDI_SUCCESS) 1303 break; 1304 1305 /* get next blkif_request in the ring */ 1306 if ((rreq = xvdi_ring_get_request(xbr)) == NULL) 1307 break; 1308 bzero(rreq, sizeof (blkif_request_t)); 1309 1310 /* populate blkif_request with this buf */ 1311 rreqready++; 1312 retval = xdf_prepare_rreq(vdp, bp, rreq); 1313 if (retval == XF_COMP) { 1314 /* finish this bp, switch to next one */ 1315 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1316 vdp->xdf_f_act = bp->av_forw; 1317 bp->av_forw = NULL; 1318 } 1319 } 1320 1321 /* 1322 * Send the request(s) to the backend 1323 */ 1324 if (rreqready) { 1325 if (xvdi_ring_push_request(xbr)) { 1326 DPRINTF(IO_DBG, ("xdf_iostart: " 1327 "sent request(s) to backend\n")); 1328 xvdi_notify_oe(vdp->xdf_dip); 1329 } 1330 } 1331 1332 mutex_exit(&vdp->xdf_dev_lk); 1333 } 1334 1335 /* 1336 * populate a single blkif_request_t w/ a buf 1337 */ 1338 static int 1339 xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1340 { 1341 int rval; 1342 grant_ref_t gr; 1343 uint8_t fsect, lsect; 1344 size_t bcnt; 1345 paddr_t dma_addr; 1346 off_t blk_off; 1347 dev_info_t *dip = vdp->xdf_dip; 1348 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1349 v_req_t *vreq = BP2VREQ(bp); 1350 uint64_t blkno = vreq->v_blkno; 1351 uint_t ndmacs = vreq->v_ndmacs; 1352 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1353 int seg = 0; 1354 int isread = IS_READ(bp); 1355 1356 if (isread) 1357 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1358 else { 1359 switch (vreq->v_flush_diskcache) { 1360 case FLUSH_DISKCACHE: 1361 ddi_put8(acchdl, &rreq->operation, 1362 BLKIF_OP_FLUSH_DISKCACHE); 1363 ddi_put16(acchdl, &rreq->handle, vdev); 1364 ddi_put64(acchdl, &rreq->id, 1365 (uint64_t)(uintptr_t)(vreq->v_gs)); 1366 ddi_put8(acchdl, &rreq->nr_segments, 0); 1367 return (XF_COMP); 1368 case WRITE_BARRIER: 1369 ddi_put8(acchdl, &rreq->operation, 1370 BLKIF_OP_WRITE_BARRIER); 1371 break; 1372 default: 1373 if (!vdp->xdf_wce) 1374 ddi_put8(acchdl, &rreq->operation, 1375 BLKIF_OP_WRITE_BARRIER); 1376 else 1377 ddi_put8(acchdl, &rreq->operation, 1378 BLKIF_OP_WRITE); 1379 break; 1380 } 1381 } 1382 1383 ddi_put16(acchdl, &rreq->handle, vdev); 1384 ddi_put64(acchdl, &rreq->sector_number, blkno); 1385 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(vreq->v_gs)); 1386 1387 /* 1388 * loop until all segments are populated or no more dma cookie in buf 1389 */ 1390 for (;;) { 1391 /* 1392 * Each segment of a blkif request can transfer up to 1393 * one 4K page of data. 1394 */ 1395 bcnt = vreq->v_dmac.dmac_size; 1396 ASSERT(bcnt <= PAGESIZE); 1397 ASSERT((bcnt % XB_BSIZE) == 0); 1398 dma_addr = vreq->v_dmac.dmac_laddress; 1399 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1400 ASSERT((blk_off & XB_BMASK) == 0); 1401 fsect = blk_off >> XB_BSHIFT; 1402 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1403 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1404 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1405 DPRINTF(IO_DBG, (" ""seg%d: dmacS %lu blk_off %ld\n", 1406 seg, vreq->v_dmac.dmac_size, blk_off)); 1407 gr = gs_grant(vreq->v_gs, PATOMA(dma_addr) >> PAGESHIFT); 1408 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1409 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1410 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1411 DPRINTF(IO_DBG, (" ""seg%d: fs %d ls %d gr %d dma 0x%"PRIx64 1412 "\n", seg, fsect, lsect, gr, dma_addr)); 1413 1414 blkno += (bcnt >> XB_BSHIFT); 1415 seg++; 1416 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1417 if (--ndmacs) { 1418 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1419 continue; 1420 } 1421 1422 vreq->v_status = VREQ_DMAWIN_DONE; 1423 vreq->v_blkno = blkno; 1424 if (vreq->v_dmaw + 1 == vreq->v_ndmaws) 1425 /* last win */ 1426 rval = XF_COMP; 1427 else 1428 rval = XF_PARTIAL; 1429 break; 1430 } 1431 ddi_put8(acchdl, &rreq->nr_segments, seg); 1432 DPRINTF(IO_DBG, ("xdf_prepare_rreq: request id=%"PRIx64" ready\n", 1433 rreq->id)); 1434 1435 return (rval); 1436 } 1437 1438 #define XDF_QSEC 50000 /* .005 second */ 1439 #define XDF_POLLCNT 12 /* loop for 12 times before time out */ 1440 1441 static int 1442 xdf_drain_io(xdf_t *vdp) 1443 { 1444 int pollc, rval; 1445 xendev_ring_t *xbr; 1446 1447 if (xdfdebug & SUSRES_DBG) 1448 xen_printf("xdf_drain_io: start\n"); 1449 1450 mutex_enter(&vdp->xdf_dev_lk); 1451 1452 if ((vdp->xdf_status != XD_READY) && (vdp->xdf_status != XD_SUSPEND)) 1453 goto out; 1454 1455 rval = 0; 1456 xbr = vdp->xdf_xb_ring; 1457 ASSERT(xbr != NULL); 1458 1459 for (pollc = 0; pollc < XDF_POLLCNT; pollc++) { 1460 if (xvdi_ring_has_unconsumed_responses(xbr)) { 1461 mutex_exit(&vdp->xdf_dev_lk); 1462 (void) xdf_intr((caddr_t)vdp); 1463 mutex_enter(&vdp->xdf_dev_lk); 1464 } 1465 if (!xvdi_ring_has_incomp_request(xbr)) 1466 goto out; 1467 1468 #ifndef XPV_HVM_DRIVER 1469 (void) HYPERVISOR_yield(); 1470 #endif 1471 /* 1472 * file-backed devices can be slow 1473 */ 1474 drv_usecwait(XDF_QSEC << pollc); 1475 } 1476 cmn_err(CE_WARN, "xdf_polled_io: timeout"); 1477 rval = EIO; 1478 out: 1479 mutex_exit(&vdp->xdf_dev_lk); 1480 if (xdfdebug & SUSRES_DBG) 1481 xen_printf("xdf_drain_io: end, err=%d\n", rval); 1482 return (rval); 1483 } 1484 1485 /* ARGSUSED5 */ 1486 static int 1487 xdf_lb_rdwr(dev_info_t *devi, uchar_t cmd, void *bufp, 1488 diskaddr_t start, size_t reqlen, void *tg_cookie) 1489 { 1490 xdf_t *vdp; 1491 struct buf *bp; 1492 int err = 0; 1493 1494 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1495 if (vdp == NULL) 1496 return (ENXIO); 1497 1498 if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_xdev_nblocks) 1499 return (EINVAL); 1500 1501 bp = getrbuf(KM_SLEEP); 1502 if (cmd == TG_READ) 1503 bp->b_flags = B_BUSY | B_READ; 1504 else 1505 bp->b_flags = B_BUSY | B_WRITE; 1506 bp->b_un.b_addr = bufp; 1507 bp->b_bcount = reqlen; 1508 bp->b_resid = 0; 1509 bp->b_blkno = start; 1510 bp->av_forw = NULL; 1511 bp->av_back = NULL; 1512 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 1513 1514 mutex_enter(&vdp->xdf_dev_lk); 1515 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1516 if (vdp->xdf_f_act == NULL) { 1517 vdp->xdf_f_act = vdp->xdf_l_act = bp; 1518 } else { 1519 vdp->xdf_l_act->av_forw = bp; 1520 vdp->xdf_l_act = bp; 1521 } 1522 mutex_exit(&vdp->xdf_dev_lk); 1523 xdf_iostart(vdp); 1524 err = biowait(bp); 1525 1526 ASSERT(bp->b_flags & B_DONE); 1527 1528 freerbuf(bp); 1529 return (err); 1530 } 1531 1532 /* 1533 * synthetic geometry 1534 */ 1535 #define XDF_NSECTS 256 1536 #define XDF_NHEADS 16 1537 1538 static int 1539 xdf_lb_getcap(dev_info_t *devi, diskaddr_t *capp) 1540 { 1541 xdf_t *vdp; 1542 1543 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1544 1545 if (vdp == NULL) 1546 return (ENXIO); 1547 1548 mutex_enter(&vdp->xdf_dev_lk); 1549 *capp = vdp->xdf_xdev_nblocks; 1550 DPRINTF(LBL_DBG, ("capacity %llu\n", *capp)); 1551 mutex_exit(&vdp->xdf_dev_lk); 1552 return (0); 1553 } 1554 1555 static int 1556 xdf_lb_getpgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1557 { 1558 xdf_t *vdp; 1559 uint_t ncyl; 1560 uint_t spc = XDF_NHEADS * XDF_NSECTS; 1561 1562 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1563 1564 if (vdp == NULL) 1565 return (ENXIO); 1566 1567 ncyl = vdp->xdf_xdev_nblocks / spc; 1568 1569 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1570 geomp->g_acyl = 0; 1571 geomp->g_nhead = XDF_NHEADS; 1572 geomp->g_secsize = XB_BSIZE; 1573 geomp->g_nsect = XDF_NSECTS; 1574 geomp->g_intrlv = 0; 1575 geomp->g_rpm = 7200; 1576 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1577 return (0); 1578 } 1579 1580 /* 1581 * No real HBA, no geometry available from it 1582 */ 1583 /*ARGSUSED*/ 1584 static int 1585 xdf_lb_getvgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1586 { 1587 return (EINVAL); 1588 } 1589 1590 static int 1591 xdf_lb_getattribute(dev_info_t *devi, tg_attribute_t *tgattributep) 1592 { 1593 xdf_t *vdp; 1594 1595 if (!(vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)))) 1596 return (ENXIO); 1597 1598 if (XD_IS_RO(vdp)) 1599 tgattributep->media_is_writable = 0; 1600 else 1601 tgattributep->media_is_writable = 1; 1602 return (0); 1603 } 1604 1605 /* ARGSUSED3 */ 1606 static int 1607 xdf_lb_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie) 1608 { 1609 switch (cmd) { 1610 case TG_GETPHYGEOM: 1611 return (xdf_lb_getpgeom(devi, (cmlb_geom_t *)arg)); 1612 case TG_GETVIRTGEOM: 1613 return (xdf_lb_getvgeom(devi, (cmlb_geom_t *)arg)); 1614 case TG_GETCAPACITY: 1615 return (xdf_lb_getcap(devi, (diskaddr_t *)arg)); 1616 case TG_GETBLOCKSIZE: 1617 *(uint32_t *)arg = XB_BSIZE; 1618 return (0); 1619 case TG_GETATTR: 1620 return (xdf_lb_getattribute(devi, (tg_attribute_t *)arg)); 1621 default: 1622 return (ENOTTY); 1623 } 1624 } 1625 1626 /* 1627 * Kick-off connect process 1628 * Status should be XD_UNKNOWN or XD_CLOSED 1629 * On success, status will be changed to XD_INIT 1630 * On error, status won't be changed 1631 */ 1632 static int 1633 xdf_start_connect(xdf_t *vdp) 1634 { 1635 char *xsnode; 1636 grant_ref_t gref; 1637 xenbus_transaction_t xbt; 1638 int rv; 1639 dev_info_t *dip = vdp->xdf_dip; 1640 1641 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == (domid_t)-1) 1642 goto errout; 1643 1644 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) { 1645 cmn_err(CE_WARN, "xdf@%s: failed to alloc event channel", 1646 ddi_get_name_addr(dip)); 1647 goto errout; 1648 } 1649 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1650 #ifdef XPV_HVM_DRIVER 1651 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1652 #else 1653 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1654 DDI_SUCCESS) { 1655 cmn_err(CE_WARN, "xdf_start_connect: xdf@%s: " 1656 "failed to add intr handler", ddi_get_name_addr(dip)); 1657 goto errout1; 1658 } 1659 #endif 1660 1661 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1662 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1663 DDI_SUCCESS) { 1664 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1665 ddi_get_name_addr(dip)); 1666 goto errout2; 1667 } 1668 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1669 1670 /* 1671 * Write into xenstore the info needed by backend 1672 */ 1673 if ((xsnode = xvdi_get_xsname(dip)) == NULL) { 1674 cmn_err(CE_WARN, "xdf@%s: " 1675 "failed to get xenstore node path", 1676 ddi_get_name_addr(dip)); 1677 goto fail_trans; 1678 } 1679 trans_retry: 1680 if (xenbus_transaction_start(&xbt)) { 1681 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1682 ddi_get_name_addr(dip)); 1683 xvdi_fatal_error(dip, EIO, "transaction start"); 1684 goto fail_trans; 1685 } 1686 1687 if (rv = xenbus_printf(xbt, xsnode, "ring-ref", "%u", gref)) { 1688 cmn_err(CE_WARN, "xdf@%s: failed to write ring-ref", 1689 ddi_get_name_addr(dip)); 1690 xvdi_fatal_error(dip, rv, "writing ring-ref"); 1691 goto abort_trans; 1692 } 1693 1694 if (rv = xenbus_printf(xbt, xsnode, "event-channel", "%u", 1695 vdp->xdf_evtchn)) { 1696 cmn_err(CE_WARN, "xdf@%s: failed to write event-channel", 1697 ddi_get_name_addr(dip)); 1698 xvdi_fatal_error(dip, rv, "writing event-channel"); 1699 goto abort_trans; 1700 } 1701 1702 /* 1703 * "protocol" is written by the domain builder in the case of PV 1704 * domains. However, it is not written for HVM domains, so let's 1705 * write it here. 1706 */ 1707 if (rv = xenbus_printf(xbt, xsnode, "protocol", "%s", 1708 XEN_IO_PROTO_ABI_NATIVE)) { 1709 cmn_err(CE_WARN, "xdf@%s: failed to write protocol", 1710 ddi_get_name_addr(dip)); 1711 xvdi_fatal_error(dip, rv, "writing protocol"); 1712 goto abort_trans; 1713 } 1714 1715 if ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0) { 1716 cmn_err(CE_WARN, "xdf@%s: " 1717 "failed to switch state to XenbusStateInitialised", 1718 ddi_get_name_addr(dip)); 1719 xvdi_fatal_error(dip, rv, "writing state"); 1720 goto abort_trans; 1721 } 1722 1723 /* kick-off connect process */ 1724 if (rv = xenbus_transaction_end(xbt, 0)) { 1725 if (rv == EAGAIN) 1726 goto trans_retry; 1727 cmn_err(CE_WARN, "xdf@%s: failed to end transaction", 1728 ddi_get_name_addr(dip)); 1729 xvdi_fatal_error(dip, rv, "completing transaction"); 1730 goto fail_trans; 1731 } 1732 1733 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1734 mutex_enter(&vdp->xdf_dev_lk); 1735 vdp->xdf_status = XD_INIT; 1736 mutex_exit(&vdp->xdf_dev_lk); 1737 1738 return (DDI_SUCCESS); 1739 1740 abort_trans: 1741 (void) xenbus_transaction_end(xbt, 1); 1742 fail_trans: 1743 xvdi_free_ring(vdp->xdf_xb_ring); 1744 errout2: 1745 #ifdef XPV_HVM_DRIVER 1746 ec_unbind_evtchn(vdp->xdf_evtchn); 1747 #else 1748 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1749 #endif 1750 errout1: 1751 xvdi_free_evtchn(dip); 1752 errout: 1753 cmn_err(CE_WARN, "xdf@%s: fail to kick-off connecting", 1754 ddi_get_name_addr(dip)); 1755 return (DDI_FAILURE); 1756 } 1757 1758 /* 1759 * Kick-off disconnect process 1760 * Status won't be changed 1761 */ 1762 static int 1763 xdf_start_disconnect(xdf_t *vdp) 1764 { 1765 if (xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed) > 0) { 1766 cmn_err(CE_WARN, "xdf@%s: fail to kick-off disconnecting", 1767 ddi_get_name_addr(vdp->xdf_dip)); 1768 return (DDI_FAILURE); 1769 } 1770 1771 return (DDI_SUCCESS); 1772 } 1773 1774 int 1775 xdf_get_flush_block(xdf_t *vdp) 1776 { 1777 /* 1778 * Get a DEV_BSIZE aligned bufer 1779 */ 1780 vdp->xdf_flush_mem = kmem_alloc(DEV_BSIZE * 2, KM_SLEEP); 1781 vdp->xdf_cache_flush_block = 1782 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), DEV_BSIZE); 1783 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1784 xdf_flush_block, DEV_BSIZE, NULL) != 0) 1785 return (DDI_FAILURE); 1786 return (DDI_SUCCESS); 1787 } 1788 1789 /* 1790 * Finish other initialization after we've connected to backend 1791 * Status should be XD_INIT before calling this routine 1792 * On success, status should be changed to XD_READY 1793 * On error, status should stay XD_INIT 1794 */ 1795 static int 1796 xdf_post_connect(xdf_t *vdp) 1797 { 1798 int rv; 1799 uint_t len; 1800 char *type; 1801 char *barrier; 1802 dev_info_t *devi = vdp->xdf_dip; 1803 1804 /* 1805 * Determine if feature barrier is supported by backend 1806 */ 1807 if (xenbus_read(XBT_NULL, xvdi_get_oename(devi), 1808 "feature-barrier", (void **)&barrier, &len) == 0) { 1809 vdp->xdf_feature_barrier = 1; 1810 kmem_free(barrier, len); 1811 } else { 1812 cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier", 1813 ddi_get_name_addr(vdp->xdf_dip)); 1814 vdp->xdf_feature_barrier = 0; 1815 } 1816 1817 /* probe backend */ 1818 if (rv = xenbus_gather(XBT_NULL, xvdi_get_oename(devi), 1819 "sectors", "%"SCNu64, &vdp->xdf_xdev_nblocks, 1820 "info", "%u", &vdp->xdf_xdev_info, NULL)) { 1821 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1822 "cannot read backend info", ddi_get_name_addr(devi)); 1823 xvdi_fatal_error(devi, rv, "reading backend info"); 1824 return (DDI_FAILURE); 1825 } 1826 1827 /* fix disk type */ 1828 if (xenbus_read(XBT_NULL, xvdi_get_xsname(devi), "device-type", 1829 (void **)&type, &len) != 0) { 1830 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1831 "cannot read device-type", ddi_get_name_addr(devi)); 1832 xvdi_fatal_error(devi, rv, "reading device-type"); 1833 return (DDI_FAILURE); 1834 } 1835 if (strcmp(type, "cdrom") == 0) 1836 vdp->xdf_xdev_info |= VDISK_CDROM; 1837 kmem_free(type, len); 1838 1839 /* 1840 * We've created all the minor nodes via cmlb_attach() using default 1841 * value in xdf_attach() to make it possible to block in xdf_open(), 1842 * in case there's anyone (say, booting thread) ever trying to open 1843 * it before connected to backend. We will refresh all those minor 1844 * nodes w/ latest info we've got now when we are almost connected. 1845 * 1846 * Don't do this when xdf is already opened by someone (could happen 1847 * during resume), for that cmlb_attach() will invalid the label info 1848 * and confuse those who has already opened the node, which is bad. 1849 */ 1850 if (!xdf_isopen(vdp, -1) && (XD_IS_CD(vdp) || XD_IS_RM(vdp))) { 1851 /* re-init cmlb w/ latest info we got from backend */ 1852 if (cmlb_attach(devi, &xdf_lb_ops, 1853 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 1854 XD_IS_RM(vdp), 1, DDI_NT_BLOCK, 1855 CMLB_FAKE_LABEL_ONE_PARTITION, 1856 vdp->xdf_vd_lbl, NULL) != 0) { 1857 cmn_err(CE_WARN, "xdf@%s: cmlb attach failed", 1858 ddi_get_name_addr(devi)); 1859 return (DDI_FAILURE); 1860 } 1861 } 1862 1863 /* mark vbd is ready for I/O */ 1864 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1865 mutex_enter(&vdp->xdf_dev_lk); 1866 vdp->xdf_status = XD_READY; 1867 mutex_exit(&vdp->xdf_dev_lk); 1868 /* 1869 * If backend has feature-barrier, see if it supports disk 1870 * cache flush op. 1871 */ 1872 vdp->xdf_flush_supported = 0; 1873 if (vdp->xdf_feature_barrier) { 1874 /* 1875 * Pretend we already know flush is supported so probe 1876 * will attempt the correct op. 1877 */ 1878 vdp->xdf_flush_supported = 1; 1879 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1880 vdp->xdf_flush_supported = 1; 1881 } else { 1882 vdp->xdf_flush_supported = 0; 1883 /* 1884 * If the other end does not support the cache flush op 1885 * then we must use a barrier-write to force disk 1886 * cache flushing. Barrier writes require that a data 1887 * block actually be written. 1888 * Cache a block to barrier-write when we are 1889 * asked to perform a flush. 1890 * XXX - would it be better to just copy 1 block 1891 * (512 bytes) from whatever write we did last 1892 * and rewrite that block? 1893 */ 1894 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) 1895 return (DDI_FAILURE); 1896 } 1897 } 1898 1899 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", ddi_get_name_addr(devi), 1900 (uint64_t)vdp->xdf_xdev_nblocks); 1901 1902 return (DDI_SUCCESS); 1903 } 1904 1905 /* 1906 * Finish other uninitialization after we've disconnected from backend 1907 * when status is XD_CLOSING or XD_INIT. After returns, status is XD_CLOSED 1908 */ 1909 static void 1910 xdf_post_disconnect(xdf_t *vdp) 1911 { 1912 #ifdef XPV_HVM_DRIVER 1913 ec_unbind_evtchn(vdp->xdf_evtchn); 1914 #else 1915 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1916 #endif 1917 xvdi_free_evtchn(vdp->xdf_dip); 1918 xvdi_free_ring(vdp->xdf_xb_ring); 1919 vdp->xdf_xb_ring = NULL; 1920 vdp->xdf_xb_ring_hdl = NULL; 1921 vdp->xdf_peer = (domid_t)-1; 1922 1923 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1924 mutex_enter(&vdp->xdf_dev_lk); 1925 vdp->xdf_status = XD_CLOSED; 1926 mutex_exit(&vdp->xdf_dev_lk); 1927 } 1928 1929 /*ARGSUSED*/ 1930 static void 1931 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 1932 { 1933 XenbusState new_state = *(XenbusState *)impl_data; 1934 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 1935 boolean_t unexpect_die = B_FALSE; 1936 int status; 1937 1938 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 1939 ddi_get_name_addr(dip), new_state)); 1940 1941 mutex_enter(&vdp->xdf_cb_lk); 1942 1943 if (xdf_check_state_transition(vdp, new_state) == DDI_FAILURE) { 1944 mutex_exit(&vdp->xdf_cb_lk); 1945 return; 1946 } 1947 1948 switch (new_state) { 1949 case XenbusStateInitialising: 1950 ASSERT(vdp->xdf_status == XD_CLOSED); 1951 /* 1952 * backend recovered from a previous failure, 1953 * kick-off connect process again 1954 */ 1955 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 1956 cmn_err(CE_WARN, "xdf@%s:" 1957 " failed to start reconnecting to backend", 1958 ddi_get_name_addr(dip)); 1959 } 1960 break; 1961 case XenbusStateConnected: 1962 ASSERT(vdp->xdf_status == XD_INIT); 1963 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1964 /* finish final init after connect */ 1965 if (xdf_post_connect(vdp) != DDI_SUCCESS) 1966 (void) xdf_start_disconnect(vdp); 1967 break; 1968 case XenbusStateClosing: 1969 if (vdp->xdf_status == XD_READY) { 1970 mutex_enter(&vdp->xdf_dev_lk); 1971 if (xdf_isopen(vdp, -1)) { 1972 cmn_err(CE_NOTE, "xdf@%s: hot-unplug failed, " 1973 "still in use", ddi_get_name_addr(dip)); 1974 mutex_exit(&vdp->xdf_dev_lk); 1975 break; 1976 } else { 1977 vdp->xdf_status = XD_CLOSING; 1978 } 1979 mutex_exit(&vdp->xdf_dev_lk); 1980 } 1981 (void) xdf_start_disconnect(vdp); 1982 break; 1983 case XenbusStateClosed: 1984 /* first check if BE closed unexpectedly */ 1985 mutex_enter(&vdp->xdf_dev_lk); 1986 if (xdf_isopen(vdp, -1)) { 1987 unexpect_die = B_TRUE; 1988 unexpectedie(vdp); 1989 cmn_err(CE_WARN, "xdf@%s: backend closed, " 1990 "reconnecting...", ddi_get_name_addr(dip)); 1991 } 1992 mutex_exit(&vdp->xdf_dev_lk); 1993 1994 if (vdp->xdf_status == XD_READY) { 1995 mutex_enter(&vdp->xdf_dev_lk); 1996 vdp->xdf_status = XD_CLOSING; 1997 mutex_exit(&vdp->xdf_dev_lk); 1998 1999 #ifdef DOMU_BACKEND 2000 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 2001 #endif 2002 2003 xdf_post_disconnect(vdp); 2004 (void) xvdi_switch_state(dip, XBT_NULL, 2005 XenbusStateClosed); 2006 } else if ((vdp->xdf_status == XD_INIT) || 2007 (vdp->xdf_status == XD_CLOSING)) { 2008 xdf_post_disconnect(vdp); 2009 } else { 2010 mutex_enter(&vdp->xdf_dev_lk); 2011 vdp->xdf_status = XD_CLOSED; 2012 mutex_exit(&vdp->xdf_dev_lk); 2013 } 2014 } 2015 2016 /* notify anybody waiting for oe state change */ 2017 mutex_enter(&vdp->xdf_dev_lk); 2018 cv_broadcast(&vdp->xdf_dev_cv); 2019 mutex_exit(&vdp->xdf_dev_lk); 2020 2021 status = vdp->xdf_status; 2022 mutex_exit(&vdp->xdf_cb_lk); 2023 2024 if (status == XD_READY) { 2025 xdf_iostart(vdp); 2026 } else if ((status == XD_CLOSED) && !unexpect_die) { 2027 /* interface is closed successfully, remove all minor nodes */ 2028 cmlb_detach(vdp->xdf_vd_lbl, NULL); 2029 cmlb_free_handle(&vdp->xdf_vd_lbl); 2030 } 2031 } 2032 2033 /* check if partition is open, -1 - check all partitions on the disk */ 2034 static boolean_t 2035 xdf_isopen(xdf_t *vdp, int partition) 2036 { 2037 int i; 2038 ulong_t parbit; 2039 boolean_t rval = B_FALSE; 2040 2041 if (partition == -1) 2042 parbit = (ulong_t)-1; 2043 else 2044 parbit = 1 << partition; 2045 2046 for (i = 0; i < OTYPCNT; i++) { 2047 if (vdp->xdf_vd_open[i] & parbit) 2048 rval = B_TRUE; 2049 } 2050 2051 return (rval); 2052 } 2053 2054 /* 2055 * Xdf_check_state_transition will check the XenbusState change to see 2056 * if the change is a valid transition or not. 2057 * The new state is written by backend domain, or by running xenstore-write 2058 * to change it manually in dom0 2059 */ 2060 static int 2061 xdf_check_state_transition(xdf_t *vdp, XenbusState oestate) 2062 { 2063 int status; 2064 int stcheck; 2065 #define STOK 0 /* need further process */ 2066 #define STNOP 1 /* no action need taking */ 2067 #define STBUG 2 /* unexpected state change, could be a bug */ 2068 2069 status = vdp->xdf_status; 2070 stcheck = STOK; 2071 2072 switch (status) { 2073 case XD_UNKNOWN: 2074 if ((oestate == XenbusStateUnknown) || 2075 (oestate == XenbusStateConnected)) 2076 stcheck = STBUG; 2077 else if ((oestate == XenbusStateInitialising) || 2078 (oestate == XenbusStateInitWait) || 2079 (oestate == XenbusStateInitialised)) 2080 stcheck = STNOP; 2081 break; 2082 case XD_INIT: 2083 if (oestate == XenbusStateUnknown) 2084 stcheck = STBUG; 2085 else if ((oestate == XenbusStateInitialising) || 2086 (oestate == XenbusStateInitWait) || 2087 (oestate == XenbusStateInitialised)) 2088 stcheck = STNOP; 2089 break; 2090 case XD_READY: 2091 if ((oestate == XenbusStateUnknown) || 2092 (oestate == XenbusStateInitialising) || 2093 (oestate == XenbusStateInitWait) || 2094 (oestate == XenbusStateInitialised)) 2095 stcheck = STBUG; 2096 else if (oestate == XenbusStateConnected) 2097 stcheck = STNOP; 2098 break; 2099 case XD_CLOSING: 2100 if ((oestate == XenbusStateUnknown) || 2101 (oestate == XenbusStateInitialising) || 2102 (oestate == XenbusStateInitWait) || 2103 (oestate == XenbusStateInitialised) || 2104 (oestate == XenbusStateConnected)) 2105 stcheck = STBUG; 2106 else if (oestate == XenbusStateClosing) 2107 stcheck = STNOP; 2108 break; 2109 case XD_CLOSED: 2110 if ((oestate == XenbusStateUnknown) || 2111 (oestate == XenbusStateConnected)) 2112 stcheck = STBUG; 2113 else if ((oestate == XenbusStateInitWait) || 2114 (oestate == XenbusStateInitialised) || 2115 (oestate == XenbusStateClosing) || 2116 (oestate == XenbusStateClosed)) 2117 stcheck = STNOP; 2118 break; 2119 case XD_SUSPEND: 2120 default: 2121 stcheck = STBUG; 2122 } 2123 2124 if (stcheck == STOK) 2125 return (DDI_SUCCESS); 2126 2127 if (stcheck == STBUG) 2128 cmn_err(CE_NOTE, "xdf@%s: unexpected otherend " 2129 "state change to %d!, when status is %d", 2130 ddi_get_name_addr(vdp->xdf_dip), oestate, status); 2131 2132 return (DDI_FAILURE); 2133 } 2134 2135 static int 2136 xdf_connect(xdf_t *vdp, boolean_t wait) 2137 { 2138 ASSERT(mutex_owned(&vdp->xdf_dev_lk)); 2139 while (vdp->xdf_status != XD_READY) { 2140 if (!wait || (vdp->xdf_status > XD_READY)) 2141 break; 2142 2143 if (cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk) == 0) 2144 break; 2145 } 2146 2147 return (vdp->xdf_status); 2148 } 2149 2150 /* 2151 * callback func when DMA/GTE resources is available 2152 * 2153 * Note: we only register one callback function to grant table subsystem 2154 * since we only have one 'struct gnttab_free_callback' in xdf_t. 2155 */ 2156 static int 2157 xdf_dmacallback(caddr_t arg) 2158 { 2159 xdf_t *vdp = (xdf_t *)arg; 2160 ASSERT(vdp != NULL); 2161 2162 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 2163 ddi_get_name_addr(vdp->xdf_dip))); 2164 2165 ddi_trigger_softintr(vdp->xdf_softintr_id); 2166 return (DDI_DMA_CALLBACK_DONE); 2167 } 2168 2169 static uint_t 2170 xdf_iorestart(caddr_t arg) 2171 { 2172 xdf_t *vdp = (xdf_t *)arg; 2173 2174 ASSERT(vdp != NULL); 2175 2176 mutex_enter(&vdp->xdf_dev_lk); 2177 ASSERT(ISDMACBON(vdp)); 2178 SETDMACBOFF(vdp); 2179 mutex_exit(&vdp->xdf_dev_lk); 2180 2181 xdf_iostart(vdp); 2182 2183 return (DDI_INTR_CLAIMED); 2184 } 2185 2186 static void 2187 xdf_timeout_handler(void *arg) 2188 { 2189 xdf_t *vdp = arg; 2190 2191 mutex_enter(&vdp->xdf_dev_lk); 2192 vdp->xdf_timeout_id = 0; 2193 mutex_exit(&vdp->xdf_dev_lk); 2194 2195 /* new timeout thread could be re-scheduled */ 2196 xdf_iostart(vdp); 2197 } 2198 2199 /* 2200 * Alloc a vreq for this bp 2201 * bp->av_back contains the pointer to the vreq upon return 2202 */ 2203 static v_req_t * 2204 vreq_get(xdf_t *vdp, buf_t *bp) 2205 { 2206 v_req_t *vreq = NULL; 2207 2208 ASSERT(BP2VREQ(bp) == NULL); 2209 2210 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 2211 if (vreq == NULL) { 2212 if (vdp->xdf_timeout_id == 0) 2213 /* restart I/O after one second */ 2214 vdp->xdf_timeout_id = 2215 timeout(xdf_timeout_handler, vdp, hz); 2216 return (NULL); 2217 } 2218 bzero(vreq, sizeof (v_req_t)); 2219 2220 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 2221 bp->av_back = (buf_t *)vreq; 2222 vreq->v_buf = bp; 2223 vreq->v_status = VREQ_INIT; 2224 /* init of other fields in vreq is up to the caller */ 2225 2226 return (vreq); 2227 } 2228 2229 static void 2230 vreq_free(xdf_t *vdp, v_req_t *vreq) 2231 { 2232 buf_t *bp = vreq->v_buf; 2233 2234 list_remove(&vdp->xdf_vreq_act, (void *)vreq); 2235 2236 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 2237 goto done; 2238 2239 switch (vreq->v_status) { 2240 case VREQ_DMAWIN_DONE: 2241 case VREQ_GS_ALLOCED: 2242 case VREQ_DMABUF_BOUND: 2243 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 2244 /*FALLTHRU*/ 2245 case VREQ_DMAMEM_ALLOCED: 2246 if (!ALIGNED_XFER(bp)) { 2247 ASSERT(vreq->v_abuf != NULL); 2248 if (!IS_ERROR(bp) && IS_READ(bp)) 2249 bcopy(vreq->v_abuf, bp->b_un.b_addr, 2250 bp->b_bcount); 2251 ddi_dma_mem_free(&vreq->v_align); 2252 } 2253 /*FALLTHRU*/ 2254 case VREQ_MEMDMAHDL_ALLOCED: 2255 if (!ALIGNED_XFER(bp)) 2256 ddi_dma_free_handle(&vreq->v_memdmahdl); 2257 /*FALLTHRU*/ 2258 case VREQ_DMAHDL_ALLOCED: 2259 ddi_dma_free_handle(&vreq->v_dmahdl); 2260 break; 2261 default: 2262 break; 2263 } 2264 done: 2265 vreq->v_buf->av_back = NULL; 2266 kmem_cache_free(xdf_vreq_cache, vreq); 2267 } 2268 2269 /* 2270 * Initalize the DMA and grant table resources for the buf 2271 */ 2272 static int 2273 vreq_setup(xdf_t *vdp, v_req_t *vreq) 2274 { 2275 int rc; 2276 ddi_dma_attr_t dmaattr; 2277 uint_t ndcs, ndws; 2278 ddi_dma_handle_t dh; 2279 ddi_dma_handle_t mdh; 2280 ddi_dma_cookie_t dc; 2281 ddi_acc_handle_t abh; 2282 caddr_t aba; 2283 ge_slot_t *gs; 2284 size_t bufsz; 2285 off_t off; 2286 size_t sz; 2287 buf_t *bp = vreq->v_buf; 2288 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 2289 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 2290 2291 switch (vreq->v_status) { 2292 case VREQ_INIT: 2293 if (IS_FLUSH_DISKCACHE(bp)) { 2294 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2295 DPRINTF(DMA_DBG, ( 2296 "xdf@%s: get ge_slotfailed\n", 2297 ddi_get_name_addr(vdp->xdf_dip))); 2298 return (DDI_FAILURE); 2299 } 2300 vreq->v_blkno = 0; 2301 vreq->v_nslots = 1; 2302 vreq->v_gs = gs; 2303 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 2304 vreq->v_status = VREQ_GS_ALLOCED; 2305 gs->vreq = vreq; 2306 return (DDI_SUCCESS); 2307 } 2308 2309 if (IS_WRITE_BARRIER(vdp, bp)) 2310 vreq->v_flush_diskcache = WRITE_BARRIER; 2311 vreq->v_blkno = bp->b_blkno + 2312 (diskaddr_t)(uintptr_t)bp->b_private; 2313 bp->b_private = NULL; 2314 /* See if we wrote new data to our flush block */ 2315 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 2316 check_fbwrite(vdp, bp, vreq->v_blkno); 2317 vreq->v_status = VREQ_INIT_DONE; 2318 /*FALLTHRU*/ 2319 2320 case VREQ_INIT_DONE: 2321 /* 2322 * alloc DMA handle 2323 */ 2324 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 2325 xdf_dmacallback, (caddr_t)vdp, &dh); 2326 if (rc != DDI_SUCCESS) { 2327 SETDMACBON(vdp); 2328 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 2329 ddi_get_name_addr(vdp->xdf_dip))); 2330 return (DDI_FAILURE); 2331 } 2332 2333 vreq->v_dmahdl = dh; 2334 vreq->v_status = VREQ_DMAHDL_ALLOCED; 2335 /*FALLTHRU*/ 2336 2337 case VREQ_DMAHDL_ALLOCED: 2338 /* 2339 * alloc dma handle for 512-byte aligned buf 2340 */ 2341 if (!ALIGNED_XFER(bp)) { 2342 /* 2343 * XXPV: we need to temporarily enlarge the seg 2344 * boundary and s/g length to work round CR6381968 2345 */ 2346 dmaattr = xb_dma_attr; 2347 dmaattr.dma_attr_seg = (uint64_t)-1; 2348 dmaattr.dma_attr_sgllen = INT_MAX; 2349 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 2350 xdf_dmacallback, (caddr_t)vdp, &mdh); 2351 if (rc != DDI_SUCCESS) { 2352 SETDMACBON(vdp); 2353 DPRINTF(DMA_DBG, ("xdf@%s: unaligned buf DMA" 2354 "handle alloc failed\n", 2355 ddi_get_name_addr(vdp->xdf_dip))); 2356 return (DDI_FAILURE); 2357 } 2358 vreq->v_memdmahdl = mdh; 2359 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 2360 } 2361 /*FALLTHRU*/ 2362 2363 case VREQ_MEMDMAHDL_ALLOCED: 2364 /* 2365 * alloc 512-byte aligned buf 2366 */ 2367 if (!ALIGNED_XFER(bp)) { 2368 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 2369 bp_mapin(bp); 2370 2371 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 2372 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 2373 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 2374 &aba, &bufsz, &abh); 2375 if (rc != DDI_SUCCESS) { 2376 SETDMACBON(vdp); 2377 DPRINTF(DMA_DBG, ( 2378 "xdf@%s: DMA mem allocation failed\n", 2379 ddi_get_name_addr(vdp->xdf_dip))); 2380 return (DDI_FAILURE); 2381 } 2382 2383 vreq->v_abuf = aba; 2384 vreq->v_align = abh; 2385 vreq->v_status = VREQ_DMAMEM_ALLOCED; 2386 2387 ASSERT(bufsz >= bp->b_bcount); 2388 if (!IS_READ(bp)) 2389 bcopy(bp->b_un.b_addr, vreq->v_abuf, 2390 bp->b_bcount); 2391 } 2392 /*FALLTHRU*/ 2393 2394 case VREQ_DMAMEM_ALLOCED: 2395 /* 2396 * dma bind 2397 */ 2398 if (ALIGNED_XFER(bp)) { 2399 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 2400 dma_flags, xdf_dmacallback, (caddr_t)vdp, 2401 &dc, &ndcs); 2402 } else { 2403 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 2404 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 2405 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 2406 } 2407 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 2408 /* get num of dma windows */ 2409 if (rc == DDI_DMA_PARTIAL_MAP) { 2410 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 2411 ASSERT(rc == DDI_SUCCESS); 2412 } else { 2413 ndws = 1; 2414 } 2415 } else { 2416 SETDMACBON(vdp); 2417 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 2418 ddi_get_name_addr(vdp->xdf_dip))); 2419 return (DDI_FAILURE); 2420 } 2421 2422 vreq->v_dmac = dc; 2423 vreq->v_dmaw = 0; 2424 vreq->v_ndmacs = ndcs; 2425 vreq->v_ndmaws = ndws; 2426 vreq->v_nslots = ndws; 2427 vreq->v_status = VREQ_DMABUF_BOUND; 2428 /*FALLTHRU*/ 2429 2430 case VREQ_DMABUF_BOUND: 2431 /* 2432 * get ge_slot, callback is set upon failure from gs_get(), 2433 * if not set previously 2434 */ 2435 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2436 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2437 ddi_get_name_addr(vdp->xdf_dip))); 2438 return (DDI_FAILURE); 2439 } 2440 2441 vreq->v_gs = gs; 2442 gs->vreq = vreq; 2443 vreq->v_status = VREQ_GS_ALLOCED; 2444 break; 2445 2446 case VREQ_GS_ALLOCED: 2447 /* nothing need to be done */ 2448 break; 2449 2450 case VREQ_DMAWIN_DONE: 2451 /* 2452 * move to the next dma window 2453 */ 2454 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 2455 2456 /* get a ge_slot for this DMA window */ 2457 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2458 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2459 ddi_get_name_addr(vdp->xdf_dip))); 2460 return (DDI_FAILURE); 2461 } 2462 2463 vreq->v_gs = gs; 2464 gs->vreq = vreq; 2465 vreq->v_dmaw++; 2466 rc = ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 2467 &vreq->v_dmac, &vreq->v_ndmacs); 2468 ASSERT(rc == DDI_SUCCESS); 2469 vreq->v_status = VREQ_GS_ALLOCED; 2470 break; 2471 2472 default: 2473 return (DDI_FAILURE); 2474 } 2475 2476 return (DDI_SUCCESS); 2477 } 2478 2479 static ge_slot_t * 2480 gs_get(xdf_t *vdp, int isread) 2481 { 2482 grant_ref_t gh; 2483 ge_slot_t *gs; 2484 2485 /* try to alloc GTEs needed in this slot, first */ 2486 if (gnttab_alloc_grant_references( 2487 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 2488 if (vdp->xdf_gnt_callback.next == NULL) { 2489 SETDMACBON(vdp); 2490 gnttab_request_free_callback( 2491 &vdp->xdf_gnt_callback, 2492 (void (*)(void *))xdf_dmacallback, 2493 (void *)vdp, 2494 BLKIF_MAX_SEGMENTS_PER_REQUEST); 2495 } 2496 return (NULL); 2497 } 2498 2499 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 2500 if (gs == NULL) { 2501 gnttab_free_grant_references(gh); 2502 if (vdp->xdf_timeout_id == 0) 2503 /* restart I/O after one second */ 2504 vdp->xdf_timeout_id = 2505 timeout(xdf_timeout_handler, vdp, hz); 2506 return (NULL); 2507 } 2508 2509 /* init gs_slot */ 2510 list_insert_head(&vdp->xdf_gs_act, (void *)gs); 2511 gs->oeid = vdp->xdf_peer; 2512 gs->isread = isread; 2513 gs->ghead = gh; 2514 gs->ngrefs = 0; 2515 2516 return (gs); 2517 } 2518 2519 static void 2520 gs_free(xdf_t *vdp, ge_slot_t *gs) 2521 { 2522 int i; 2523 grant_ref_t *gp = gs->ge; 2524 int ngrefs = gs->ngrefs; 2525 boolean_t isread = gs->isread; 2526 2527 list_remove(&vdp->xdf_gs_act, (void *)gs); 2528 2529 /* release all grant table entry resources used in this slot */ 2530 for (i = 0; i < ngrefs; i++, gp++) 2531 gnttab_end_foreign_access(*gp, !isread, 0); 2532 gnttab_free_grant_references(gs->ghead); 2533 2534 kmem_cache_free(xdf_gs_cache, (void *)gs); 2535 } 2536 2537 static grant_ref_t 2538 gs_grant(ge_slot_t *gs, mfn_t mfn) 2539 { 2540 grant_ref_t gr = gnttab_claim_grant_reference(&gs->ghead); 2541 2542 ASSERT(gr != -1); 2543 ASSERT(gs->ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 2544 gs->ge[gs->ngrefs++] = gr; 2545 gnttab_grant_foreign_access_ref(gr, gs->oeid, mfn, !gs->isread); 2546 2547 return (gr); 2548 } 2549 2550 static void 2551 unexpectedie(xdf_t *vdp) 2552 { 2553 /* clean up I/Os in ring that have responses */ 2554 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 2555 mutex_exit(&vdp->xdf_dev_lk); 2556 (void) xdf_intr((caddr_t)vdp); 2557 mutex_enter(&vdp->xdf_dev_lk); 2558 } 2559 2560 /* free up all grant table entries */ 2561 while (!list_is_empty(&vdp->xdf_gs_act)) 2562 gs_free(vdp, list_head(&vdp->xdf_gs_act)); 2563 2564 /* 2565 * move bp back to active list orderly 2566 * vreq_busy is updated in vreq_free() 2567 */ 2568 while (!list_is_empty(&vdp->xdf_vreq_act)) { 2569 v_req_t *vreq = list_head(&vdp->xdf_vreq_act); 2570 buf_t *bp = vreq->v_buf; 2571 2572 bp->av_back = NULL; 2573 bp->b_resid = bp->b_bcount; 2574 if (vdp->xdf_f_act == NULL) { 2575 vdp->xdf_f_act = vdp->xdf_l_act = bp; 2576 } else { 2577 /* move to the head of list */ 2578 bp->av_forw = vdp->xdf_f_act; 2579 vdp->xdf_f_act = bp; 2580 } 2581 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 2582 vreq_free(vdp, vreq); 2583 } 2584 } 2585 2586 static void 2587 xdfmin(struct buf *bp) 2588 { 2589 if (bp->b_bcount > xdf_maxphys) 2590 bp->b_bcount = xdf_maxphys; 2591 } 2592