1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * xdf.c - Xen Virtual Block Device Driver 29 * TODO: 30 * - support alternate block size (currently only DEV_BSIZE supported) 31 * - revalidate geometry for removable devices 32 */ 33 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 36 #include <sys/types.h> 37 #include <sys/conf.h> 38 #include <sys/ddi.h> 39 #include <sys/dditypes.h> 40 #include <sys/sunddi.h> 41 #include <sys/list.h> 42 #include <sys/cmlb.h> 43 #include <sys/dkio.h> 44 #include <sys/vtoc.h> 45 #include <sys/modctl.h> 46 #include <sys/bootconf.h> 47 #include <sys/promif.h> 48 #include <sys/sysmacros.h> 49 #include <sys/kstat.h> 50 #include <sys/mach_mmu.h> 51 #ifdef XPV_HVM_DRIVER 52 #include <sys/xpv_support.h> 53 #endif 54 #include <public/io/xenbus.h> 55 #include <xen/sys/xenbus_impl.h> 56 #include <xen/sys/xendev.h> 57 #include <sys/gnttab.h> 58 #include <sys/scsi/generic/inquiry.h> 59 #include <io/xdf.h> 60 61 #define FLUSH_DISKCACHE 0x1 62 #define WRITE_BARRIER 0x2 63 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 64 #define USE_WRITE_BARRIER(vdp) \ 65 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 66 #define USE_FLUSH_DISKCACHE(vdp) \ 67 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 68 #define IS_WRITE_BARRIER(vdp, bp) \ 69 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 70 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 71 #define IS_FLUSH_DISKCACHE(bp) \ 72 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 73 74 static void *vbd_ss; 75 static kmem_cache_t *xdf_vreq_cache; 76 static kmem_cache_t *xdf_gs_cache; 77 static int xdf_maxphys = XB_MAXPHYS; 78 int xdfdebug = 0; 79 extern int do_polled_io; 80 diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 81 int xdf_barrier_flush_disable = 0; 82 83 /* 84 * dev_ops and cb_ops entrypoints 85 */ 86 static int xdf_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 87 static int xdf_attach(dev_info_t *, ddi_attach_cmd_t); 88 static int xdf_detach(dev_info_t *, ddi_detach_cmd_t); 89 static int xdf_reset(dev_info_t *, ddi_reset_cmd_t); 90 static int xdf_open(dev_t *, int, int, cred_t *); 91 static int xdf_close(dev_t, int, int, struct cred *); 92 static int xdf_strategy(struct buf *); 93 static int xdf_read(dev_t, struct uio *, cred_t *); 94 static int xdf_aread(dev_t, struct aio_req *, cred_t *); 95 static int xdf_write(dev_t, struct uio *, cred_t *); 96 static int xdf_awrite(dev_t, struct aio_req *, cred_t *); 97 static int xdf_dump(dev_t, caddr_t, daddr_t, int); 98 static int xdf_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 99 static uint_t xdf_intr(caddr_t); 100 static int xdf_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 101 caddr_t, int *); 102 103 /* 104 * misc private functions 105 */ 106 static int xdf_suspend(dev_info_t *); 107 static int xdf_resume(dev_info_t *); 108 static int xdf_start_connect(xdf_t *); 109 static int xdf_start_disconnect(xdf_t *); 110 static int xdf_post_connect(xdf_t *); 111 static void xdf_post_disconnect(xdf_t *); 112 static void xdf_oe_change(dev_info_t *, ddi_eventcookie_t, void *, void *); 113 static void xdf_iostart(xdf_t *); 114 static void xdf_iofini(xdf_t *, uint64_t, int); 115 static int xdf_prepare_rreq(xdf_t *, struct buf *, blkif_request_t *); 116 static int xdf_drain_io(xdf_t *); 117 static boolean_t xdf_isopen(xdf_t *, int); 118 static int xdf_check_state_transition(xdf_t *, XenbusState); 119 static int xdf_connect(xdf_t *, boolean_t); 120 static int xdf_dmacallback(caddr_t); 121 static void xdf_timeout_handler(void *); 122 static uint_t xdf_iorestart(caddr_t); 123 static v_req_t *vreq_get(xdf_t *, buf_t *); 124 static void vreq_free(xdf_t *, v_req_t *); 125 static int vreq_setup(xdf_t *, v_req_t *); 126 static ge_slot_t *gs_get(xdf_t *, int); 127 static void gs_free(xdf_t *, ge_slot_t *); 128 static grant_ref_t gs_grant(ge_slot_t *, mfn_t); 129 static void unexpectedie(xdf_t *); 130 static void xdfmin(struct buf *); 131 132 static struct cb_ops xdf_cbops = { 133 xdf_open, 134 xdf_close, 135 xdf_strategy, 136 nodev, 137 xdf_dump, 138 xdf_read, 139 xdf_write, 140 xdf_ioctl, 141 nodev, 142 nodev, 143 nodev, 144 nochpoll, 145 xdf_prop_op, 146 NULL, 147 D_MP | D_NEW | D_64BIT, 148 CB_REV, 149 xdf_aread, 150 xdf_awrite 151 }; 152 153 struct dev_ops xdf_devops = { 154 DEVO_REV, /* devo_rev */ 155 0, /* devo_refcnt */ 156 xdf_getinfo, /* devo_getinfo */ 157 nulldev, /* devo_identify */ 158 nulldev, /* devo_probe */ 159 xdf_attach, /* devo_attach */ 160 xdf_detach, /* devo_detach */ 161 xdf_reset, /* devo_reset */ 162 &xdf_cbops, /* devo_cb_ops */ 163 (struct bus_ops *)NULL /* devo_bus_ops */ 164 }; 165 166 static struct modldrv modldrv = { 167 &mod_driverops, /* Type of module. This one is a driver */ 168 "virtual block driver %I%", /* short description */ 169 &xdf_devops /* driver specific ops */ 170 }; 171 172 static struct modlinkage xdf_modlinkage = { 173 MODREV_1, (void *)&modldrv, NULL 174 }; 175 176 /* 177 * I/O buffer DMA attributes 178 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 179 */ 180 static ddi_dma_attr_t xb_dma_attr = { 181 DMA_ATTR_V0, 182 (uint64_t)0, /* lowest address */ 183 (uint64_t)0xffffffffffffffff, /* highest usable address */ 184 (uint64_t)0xffffff, /* DMA counter limit max */ 185 (uint64_t)XB_BSIZE, /* alignment in bytes */ 186 XB_BSIZE - 1, /* bitmap of burst sizes */ 187 XB_BSIZE, /* min transfer */ 188 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 189 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 190 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 191 XB_BSIZE, /* granularity */ 192 0, /* flags (reserved) */ 193 }; 194 195 static ddi_device_acc_attr_t xc_acc_attr = { 196 DDI_DEVICE_ATTR_V0, 197 DDI_NEVERSWAP_ACC, 198 DDI_STRICTORDER_ACC 199 }; 200 201 /* callbacks from commmon label */ 202 203 static int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 204 void *); 205 static int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 206 207 static cmlb_tg_ops_t xdf_lb_ops = { 208 TG_DK_OPS_VERSION_1, 209 xdf_lb_rdwr, 210 xdf_lb_getinfo 211 }; 212 213 int 214 _init(void) 215 { 216 int rc; 217 218 if ((rc = ddi_soft_state_init(&vbd_ss, sizeof (xdf_t), 0)) == 0) { 219 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 220 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 221 ASSERT(xdf_vreq_cache != NULL); 222 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 223 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 224 ASSERT(xdf_gs_cache != NULL); 225 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 226 kmem_cache_destroy(xdf_vreq_cache); 227 kmem_cache_destroy(xdf_gs_cache); 228 ddi_soft_state_fini(&vbd_ss); 229 } 230 } 231 232 return (rc); 233 } 234 235 int 236 _fini(void) 237 { 238 int err; 239 240 if ((err = mod_remove(&xdf_modlinkage)) != 0) 241 return (err); 242 243 kmem_cache_destroy(xdf_vreq_cache); 244 kmem_cache_destroy(xdf_gs_cache); 245 ddi_soft_state_fini(&vbd_ss); 246 247 return (0); 248 } 249 250 int 251 _info(struct modinfo *modinfop) 252 { 253 return (mod_info(&xdf_modlinkage, modinfop)); 254 } 255 256 /*ARGSUSED*/ 257 static int 258 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 259 { 260 int instance; 261 xdf_t *vbdp; 262 263 instance = XDF_INST(getminor((dev_t)arg)); 264 265 switch (cmd) { 266 case DDI_INFO_DEVT2DEVINFO: 267 if ((vbdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) { 268 *rp = NULL; 269 return (DDI_FAILURE); 270 } 271 *rp = vbdp->xdf_dip; 272 return (DDI_SUCCESS); 273 274 case DDI_INFO_DEVT2INSTANCE: 275 *rp = (void *)(uintptr_t)instance; 276 return (DDI_SUCCESS); 277 278 default: 279 return (DDI_FAILURE); 280 } 281 } 282 283 static int 284 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 285 char *name, caddr_t valuep, int *lengthp) 286 { 287 int instance = ddi_get_instance(dip); 288 xdf_t *vdp; 289 diskaddr_t p_blkcnt; 290 291 /* 292 * xdf dynamic properties are device specific and size oriented. 293 * Requests issued under conditions where size is valid are passed 294 * to ddi_prop_op_nblocks with the size information, otherwise the 295 * request is passed to ddi_prop_op. 296 */ 297 vdp = ddi_get_soft_state(vbd_ss, instance); 298 299 if ((dev == DDI_DEV_T_ANY) || (vdp == NULL)) 300 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 301 name, valuep, lengthp)); 302 303 /* do cv_wait until connected or failed */ 304 mutex_enter(&vdp->xdf_dev_lk); 305 if (xdf_connect(vdp, B_TRUE) != XD_READY) { 306 mutex_exit(&vdp->xdf_dev_lk); 307 goto out; 308 } 309 mutex_exit(&vdp->xdf_dev_lk); 310 311 if (cmlb_partinfo(vdp->xdf_vd_lbl, XDF_PART(getminor(dev)), &p_blkcnt, 312 NULL, NULL, NULL, NULL) == 0) 313 return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags, 314 name, valuep, lengthp, (uint64_t)p_blkcnt)); 315 316 out: 317 return (ddi_prop_op(dev, dip, prop_op, mod_flags, name, valuep, 318 lengthp)); 319 } 320 321 static int 322 xdf_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 323 { 324 xdf_t *vdp; 325 ddi_iblock_cookie_t ibc; 326 ddi_iblock_cookie_t softibc; 327 int instance; 328 #if defined(XPV_HVM_DRIVER) && defined(__i386) 329 /* XXX: 6609126 32-bit xdf driver panics on a 64-bit dom0 */ 330 extern int xen_is_64bit; 331 332 if (xen_is_64bit) { 333 cmn_err(CE_WARN, "xdf cannot be used in 32-bit domUs on a" 334 " 64-bit dom0."); 335 return (DDI_FAILURE); 336 } 337 #endif 338 339 xdfdebug = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_NOTPROM, 340 "xdfdebug", 0); 341 342 switch (cmd) { 343 case DDI_ATTACH: 344 break; 345 346 case DDI_RESUME: 347 return (xdf_resume(devi)); 348 349 default: 350 return (DDI_FAILURE); 351 } 352 353 instance = ddi_get_instance(devi); 354 if (ddi_soft_state_zalloc(vbd_ss, instance) != DDI_SUCCESS) 355 return (DDI_FAILURE); 356 357 DPRINTF(DDI_DBG, ("xdf%d: attaching\n", instance)); 358 vdp = ddi_get_soft_state(vbd_ss, instance); 359 vdp->xdf_dip = devi; 360 if (ddi_get_iblock_cookie(devi, 0, &ibc) != DDI_SUCCESS) { 361 cmn_err(CE_WARN, "xdf@%s: failed to get iblock cookie", 362 ddi_get_name_addr(devi)); 363 goto errout1; 364 } 365 366 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc); 367 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc); 368 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 369 ddi_set_driver_private(devi, vdp); 370 371 if (ddi_get_soft_iblock_cookie(devi, DDI_SOFTINT_LOW, &softibc) 372 != DDI_SUCCESS) { 373 cmn_err(CE_WARN, "xdf@%s: failed to get softintr iblock cookie", 374 ddi_get_name_addr(devi)); 375 goto errout2; 376 } 377 if (ddi_add_softintr(devi, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 378 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 379 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 380 ddi_get_name_addr(devi)); 381 goto errout2; 382 } 383 384 /* 385 * create kstat for iostat(1M) 386 */ 387 if ((vdp->xdf_xdev_iostat = kstat_create("xdf", instance, NULL, "disk", 388 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) != NULL) { 389 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 390 kstat_install(vdp->xdf_xdev_iostat); 391 } else { 392 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 393 ddi_get_name_addr(devi)); 394 goto errout3; 395 } 396 397 /* 398 * driver handles kernel-issued IOCTLs 399 */ 400 if (ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP, 401 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 402 cmn_err(CE_WARN, "xdf@%s: cannot create DDI_KERNEL_IOCTL prop", 403 ddi_get_name_addr(devi)); 404 goto errout4; 405 } 406 407 /* 408 * create default device minor nodes: non-removable disk 409 * we will adjust minor nodes after we are connected w/ backend 410 */ 411 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 412 if (cmlb_attach(devi, &xdf_lb_ops, DTYPE_DIRECT, 0, 1, DDI_NT_BLOCK, 413 CMLB_FAKE_LABEL_ONE_PARTITION, vdp->xdf_vd_lbl, NULL) != 0) { 414 cmn_err(CE_WARN, "xdf@%s: default cmlb attach failed", 415 ddi_get_name_addr(devi)); 416 goto errout5; 417 } 418 419 /* 420 * We ship with cache-enabled disks 421 */ 422 vdp->xdf_wce = 1; 423 424 mutex_enter(&vdp->xdf_cb_lk); 425 426 /* Watch backend XenbusState change */ 427 if (xvdi_add_event_handler(devi, XS_OE_STATE, 428 xdf_oe_change) != DDI_SUCCESS) { 429 mutex_exit(&vdp->xdf_cb_lk); 430 goto errout6; 431 } 432 433 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 434 cmn_err(CE_WARN, "xdf@%s: start connection failed", 435 ddi_get_name_addr(devi)); 436 (void) xdf_start_disconnect(vdp); 437 mutex_exit(&vdp->xdf_cb_lk); 438 goto errout7; 439 } 440 441 mutex_exit(&vdp->xdf_cb_lk); 442 443 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 444 offsetof(v_req_t, v_link)); 445 list_create(&vdp->xdf_gs_act, sizeof (ge_slot_t), 446 offsetof(ge_slot_t, link)); 447 448 ddi_report_dev(devi); 449 DPRINTF(DDI_DBG, ("xdf%d: attached\n", instance)); 450 451 return (DDI_SUCCESS); 452 453 errout7: 454 xvdi_remove_event_handler(devi, XS_OE_STATE); 455 errout6: 456 cmlb_detach(vdp->xdf_vd_lbl, NULL); 457 errout5: 458 cmlb_free_handle(&vdp->xdf_vd_lbl); 459 ddi_prop_remove_all(devi); 460 errout4: 461 kstat_delete(vdp->xdf_xdev_iostat); 462 errout3: 463 ddi_remove_softintr(vdp->xdf_softintr_id); 464 errout2: 465 ddi_set_driver_private(devi, NULL); 466 cv_destroy(&vdp->xdf_dev_cv); 467 mutex_destroy(&vdp->xdf_cb_lk); 468 mutex_destroy(&vdp->xdf_dev_lk); 469 errout1: 470 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(devi)); 471 ddi_soft_state_free(vbd_ss, instance); 472 return (DDI_FAILURE); 473 } 474 475 static int 476 xdf_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 477 { 478 xdf_t *vdp; 479 int instance; 480 481 switch (cmd) { 482 483 case DDI_PM_SUSPEND: 484 break; 485 486 case DDI_SUSPEND: 487 return (xdf_suspend(devi)); 488 489 case DDI_DETACH: 490 break; 491 492 default: 493 return (DDI_FAILURE); 494 } 495 496 instance = ddi_get_instance(devi); 497 DPRINTF(DDI_DBG, ("xdf%d: detaching\n", instance)); 498 vdp = ddi_get_soft_state(vbd_ss, instance); 499 500 if (vdp == NULL) 501 return (DDI_FAILURE); 502 503 mutex_enter(&vdp->xdf_dev_lk); 504 if (xdf_isopen(vdp, -1)) { 505 mutex_exit(&vdp->xdf_dev_lk); 506 return (DDI_FAILURE); 507 } 508 509 if (vdp->xdf_status != XD_CLOSED) { 510 mutex_exit(&vdp->xdf_dev_lk); 511 return (DDI_FAILURE); 512 } 513 514 ASSERT(!ISDMACBON(vdp)); 515 mutex_exit(&vdp->xdf_dev_lk); 516 517 if (vdp->xdf_timeout_id != 0) 518 (void) untimeout(vdp->xdf_timeout_id); 519 520 xvdi_remove_event_handler(devi, XS_OE_STATE); 521 522 /* we'll support backend running in domU later */ 523 #ifdef DOMU_BACKEND 524 (void) xvdi_post_event(devi, XEN_HP_REMOVE); 525 #endif 526 527 list_destroy(&vdp->xdf_vreq_act); 528 list_destroy(&vdp->xdf_gs_act); 529 ddi_prop_remove_all(devi); 530 kstat_delete(vdp->xdf_xdev_iostat); 531 ddi_remove_softintr(vdp->xdf_softintr_id); 532 ddi_set_driver_private(devi, NULL); 533 cv_destroy(&vdp->xdf_dev_cv); 534 mutex_destroy(&vdp->xdf_cb_lk); 535 mutex_destroy(&vdp->xdf_dev_lk); 536 if (vdp->xdf_cache_flush_block != NULL) 537 kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE); 538 ddi_soft_state_free(vbd_ss, instance); 539 return (DDI_SUCCESS); 540 } 541 542 static int 543 xdf_suspend(dev_info_t *devi) 544 { 545 xdf_t *vdp; 546 int instance; 547 enum xdf_state st; 548 549 instance = ddi_get_instance(devi); 550 551 if (xdfdebug & SUSRES_DBG) 552 xen_printf("xdf_suspend: xdf#%d\n", instance); 553 554 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 555 return (DDI_FAILURE); 556 557 xvdi_suspend(devi); 558 559 mutex_enter(&vdp->xdf_cb_lk); 560 mutex_enter(&vdp->xdf_dev_lk); 561 st = vdp->xdf_status; 562 /* change status to stop further I/O requests */ 563 if (st == XD_READY) 564 vdp->xdf_status = XD_SUSPEND; 565 mutex_exit(&vdp->xdf_dev_lk); 566 mutex_exit(&vdp->xdf_cb_lk); 567 568 /* make sure no more I/O responses left in the ring buffer */ 569 if ((st == XD_INIT) || (st == XD_READY)) { 570 #ifdef XPV_HVM_DRIVER 571 ec_unbind_evtchn(vdp->xdf_evtchn); 572 #else 573 (void) ddi_remove_intr(devi, 0, NULL); 574 #endif 575 (void) xdf_drain_io(vdp); 576 /* 577 * no need to teardown the ring buffer here 578 * it will be simply re-init'ed during resume when 579 * we call xvdi_alloc_ring 580 */ 581 } 582 583 if (xdfdebug & SUSRES_DBG) 584 xen_printf("xdf_suspend: SUCCESS\n"); 585 586 return (DDI_SUCCESS); 587 } 588 589 /*ARGSUSED*/ 590 static int 591 xdf_resume(dev_info_t *devi) 592 { 593 xdf_t *vdp; 594 int instance; 595 596 instance = ddi_get_instance(devi); 597 if (xdfdebug & SUSRES_DBG) 598 xen_printf("xdf_resume: xdf%d\n", instance); 599 600 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 601 return (DDI_FAILURE); 602 603 mutex_enter(&vdp->xdf_cb_lk); 604 605 if (xvdi_resume(devi) != DDI_SUCCESS) { 606 mutex_exit(&vdp->xdf_cb_lk); 607 return (DDI_FAILURE); 608 } 609 610 mutex_enter(&vdp->xdf_dev_lk); 611 ASSERT(vdp->xdf_status != XD_READY); 612 vdp->xdf_status = XD_UNKNOWN; 613 mutex_exit(&vdp->xdf_dev_lk); 614 615 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 616 mutex_exit(&vdp->xdf_cb_lk); 617 return (DDI_FAILURE); 618 } 619 620 mutex_exit(&vdp->xdf_cb_lk); 621 622 if (xdfdebug & SUSRES_DBG) 623 xen_printf("xdf_resume: done\n"); 624 return (DDI_SUCCESS); 625 } 626 627 /*ARGSUSED*/ 628 static int 629 xdf_reset(dev_info_t *devi, ddi_reset_cmd_t cmd) 630 { 631 xdf_t *vdp; 632 int instance; 633 634 instance = ddi_get_instance(devi); 635 DPRINTF(DDI_DBG, ("xdf%d: resetting\n", instance)); 636 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 637 return (DDI_FAILURE); 638 639 /* 640 * wait for any outstanding I/O to complete 641 */ 642 (void) xdf_drain_io(vdp); 643 644 DPRINTF(DDI_DBG, ("xdf%d: reset complete\n", instance)); 645 return (DDI_SUCCESS); 646 } 647 648 static int 649 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 650 { 651 minor_t minor; 652 xdf_t *vdp; 653 int part; 654 ulong_t parbit; 655 diskaddr_t p_blkct = 0; 656 boolean_t firstopen; 657 boolean_t nodelay; 658 659 nodelay = (flag & (FNDELAY | FNONBLOCK)); 660 minor = getminor(*devp); 661 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 662 return (ENXIO); 663 664 DPRINTF(DDI_DBG, ("xdf%d: opening\n", XDF_INST(minor))); 665 666 /* do cv_wait until connected or failed */ 667 mutex_enter(&vdp->xdf_dev_lk); 668 if (!nodelay && (xdf_connect(vdp, B_TRUE) != XD_READY)) { 669 mutex_exit(&vdp->xdf_dev_lk); 670 return (ENXIO); 671 } 672 673 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 674 mutex_exit(&vdp->xdf_dev_lk); 675 return (EROFS); 676 } 677 678 part = XDF_PART(minor); 679 parbit = 1 << part; 680 if (vdp->xdf_vd_exclopen & parbit) { 681 mutex_exit(&vdp->xdf_dev_lk); 682 return (EBUSY); 683 } 684 685 /* are we the first one to open this node? */ 686 firstopen = !xdf_isopen(vdp, -1); 687 688 if ((flag & FEXCL) && !firstopen) { 689 mutex_exit(&vdp->xdf_dev_lk); 690 return (EBUSY); 691 } 692 693 if (otyp == OTYP_LYR) 694 vdp->xdf_vd_lyropen[part]++; 695 696 vdp->xdf_vd_open[otyp] |= parbit; 697 698 if (flag & FEXCL) 699 vdp->xdf_vd_exclopen |= parbit; 700 701 mutex_exit(&vdp->xdf_dev_lk); 702 703 /* force a re-validation */ 704 if (firstopen) 705 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 706 707 /* 708 * check size 709 * ignore CD/DVD which contains a zero-sized s0 710 */ 711 if (!nodelay && !XD_IS_CD(vdp) && 712 ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 713 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0))) { 714 (void) xdf_close(*devp, flag, otyp, credp); 715 return (ENXIO); 716 } 717 718 return (0); 719 } 720 721 /*ARGSUSED*/ 722 static int 723 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 724 { 725 minor_t minor; 726 xdf_t *vdp; 727 int part; 728 ulong_t parbit; 729 730 minor = getminor(dev); 731 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 732 return (ENXIO); 733 734 mutex_enter(&vdp->xdf_dev_lk); 735 part = XDF_PART(minor); 736 if (!xdf_isopen(vdp, part)) { 737 mutex_exit(&vdp->xdf_dev_lk); 738 return (ENXIO); 739 } 740 parbit = 1 << part; 741 742 if (otyp == OTYP_LYR) { 743 if (vdp->xdf_vd_lyropen[part] != 0) 744 vdp->xdf_vd_lyropen[part]--; 745 if (vdp->xdf_vd_lyropen[part] == 0) 746 vdp->xdf_vd_open[OTYP_LYR] &= ~parbit; 747 } else { 748 vdp->xdf_vd_open[otyp] &= ~parbit; 749 } 750 vdp->xdf_vd_exclopen &= ~parbit; 751 752 mutex_exit(&vdp->xdf_dev_lk); 753 return (0); 754 } 755 756 static int 757 xdf_strategy(struct buf *bp) 758 { 759 xdf_t *vdp; 760 minor_t minor; 761 diskaddr_t p_blkct, p_blkst; 762 ulong_t nblks; 763 int part; 764 765 minor = getminor(bp->b_edev); 766 part = XDF_PART(minor); 767 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) || 768 !xdf_isopen(vdp, part) || 769 cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 770 &p_blkst, NULL, NULL, NULL)) { 771 bioerror(bp, ENXIO); 772 bp->b_resid = bp->b_bcount; 773 biodone(bp); 774 return (0); 775 } 776 777 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 778 bioerror(bp, EROFS); 779 bp->b_resid = bp->b_bcount; 780 biodone(bp); 781 return (0); 782 } 783 784 /* 785 * starting beyond partition 786 */ 787 if (bp->b_blkno > p_blkct) { 788 DPRINTF(IO_DBG, ("xdf: block %lld exceeds VBD size %"PRIu64, 789 (longlong_t)bp->b_blkno, (uint64_t)p_blkct)); 790 bioerror(bp, EINVAL); 791 bp->b_resid = bp->b_bcount; 792 biodone(bp); 793 return (0); 794 } 795 796 /* Legacy: don't set error flag at this case */ 797 if (bp->b_blkno == p_blkct) { 798 bp->b_resid = bp->b_bcount; 799 biodone(bp); 800 return (0); 801 } 802 803 /* 804 * adjust for partial transfer 805 */ 806 nblks = bp->b_bcount >> XB_BSHIFT; 807 if ((bp->b_blkno + nblks) > p_blkct) { 808 bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT; 809 bp->b_bcount -= bp->b_resid; 810 } 811 812 813 DPRINTF(IO_DBG, ("xdf: strategy blk %lld len %lu\n", 814 (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount)); 815 816 mutex_enter(&vdp->xdf_dev_lk); 817 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 818 if (vdp->xdf_f_act == NULL) { 819 vdp->xdf_f_act = vdp->xdf_l_act = bp; 820 } else { 821 vdp->xdf_l_act->av_forw = bp; 822 vdp->xdf_l_act = bp; 823 } 824 bp->av_forw = NULL; 825 bp->av_back = NULL; /* not tagged with a v_req */ 826 bp->b_private = (void *)(uintptr_t)p_blkst; 827 mutex_exit(&vdp->xdf_dev_lk); 828 xdf_iostart(vdp); 829 if (do_polled_io) 830 (void) xdf_drain_io(vdp); 831 return (0); 832 } 833 834 /*ARGSUSED*/ 835 static int 836 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 837 { 838 839 xdf_t *vdp; 840 minor_t minor; 841 diskaddr_t p_blkcnt; 842 int part; 843 844 minor = getminor(dev); 845 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 846 return (ENXIO); 847 848 DPRINTF(IO_DBG, ("xdf: read offset 0x%"PRIx64"\n", 849 (int64_t)uiop->uio_offset)); 850 851 part = XDF_PART(minor); 852 if (!xdf_isopen(vdp, part)) 853 return (ENXIO); 854 855 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 856 NULL, NULL, NULL, NULL)) 857 return (ENXIO); 858 859 if (U_INVAL(uiop)) 860 return (EINVAL); 861 862 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 863 } 864 865 /*ARGSUSED*/ 866 static int 867 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 868 { 869 xdf_t *vdp; 870 minor_t minor; 871 diskaddr_t p_blkcnt; 872 int part; 873 874 minor = getminor(dev); 875 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 876 return (ENXIO); 877 878 DPRINTF(IO_DBG, ("xdf: write offset 0x%"PRIx64"\n", 879 (int64_t)uiop->uio_offset)); 880 881 part = XDF_PART(minor); 882 if (!xdf_isopen(vdp, part)) 883 return (ENXIO); 884 885 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 886 NULL, NULL, NULL, NULL)) 887 return (ENXIO); 888 889 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 890 return (ENOSPC); 891 892 if (U_INVAL(uiop)) 893 return (EINVAL); 894 895 return (physio(xdf_strategy, NULL, dev, B_WRITE, minphys, uiop)); 896 } 897 898 /*ARGSUSED*/ 899 static int 900 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 901 { 902 xdf_t *vdp; 903 minor_t minor; 904 struct uio *uiop = aiop->aio_uio; 905 diskaddr_t p_blkcnt; 906 int part; 907 908 minor = getminor(dev); 909 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 910 return (ENXIO); 911 912 part = XDF_PART(minor); 913 if (!xdf_isopen(vdp, part)) 914 return (ENXIO); 915 916 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 917 NULL, NULL, NULL, NULL)) 918 return (ENXIO); 919 920 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 921 return (ENOSPC); 922 923 if (U_INVAL(uiop)) 924 return (EINVAL); 925 926 return (aphysio(xdf_strategy, anocancel, dev, B_READ, minphys, aiop)); 927 } 928 929 /*ARGSUSED*/ 930 static int 931 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 932 { 933 xdf_t *vdp; 934 minor_t minor; 935 struct uio *uiop = aiop->aio_uio; 936 diskaddr_t p_blkcnt; 937 int part; 938 939 minor = getminor(dev); 940 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 941 return (ENXIO); 942 943 part = XDF_PART(minor); 944 if (!xdf_isopen(vdp, part)) 945 return (ENXIO); 946 947 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 948 NULL, NULL, NULL, NULL)) 949 return (ENXIO); 950 951 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 952 return (ENOSPC); 953 954 if (U_INVAL(uiop)) 955 return (EINVAL); 956 957 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, minphys, aiop)); 958 } 959 960 static int 961 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 962 { 963 struct buf dumpbuf, *dbp; 964 xdf_t *vdp; 965 minor_t minor; 966 int err = 0; 967 int part; 968 diskaddr_t p_blkcnt, p_blkst; 969 970 minor = getminor(dev); 971 if (!(vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)))) 972 return (ENXIO); 973 974 DPRINTF(IO_DBG, ("xdf: dump addr (0x%p) blk (%ld) nblks (%d)\n", 975 addr, blkno, nblk)); 976 977 part = XDF_PART(minor); 978 if (!xdf_isopen(vdp, part)) 979 return (ENXIO); 980 981 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 982 NULL, NULL, NULL)) 983 return (ENXIO); 984 985 if ((blkno + nblk) > p_blkcnt) { 986 cmn_err(CE_WARN, "xdf: block %ld exceeds VBD size %"PRIu64, 987 blkno + nblk, (uint64_t)vdp->xdf_xdev_nblocks); 988 return (EINVAL); 989 } 990 991 dbp = &dumpbuf; 992 bioinit(dbp); 993 dbp->b_flags = B_BUSY; 994 dbp->b_un.b_addr = addr; 995 dbp->b_bcount = nblk << DEV_BSHIFT; 996 dbp->b_resid = 0; 997 dbp->b_blkno = blkno; 998 dbp->b_edev = dev; 999 dbp->b_private = (void *)(uintptr_t)p_blkst; 1000 1001 mutex_enter(&vdp->xdf_dev_lk); 1002 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1003 if (vdp->xdf_f_act == NULL) { 1004 vdp->xdf_f_act = vdp->xdf_l_act = dbp; 1005 } else { 1006 vdp->xdf_l_act->av_forw = dbp; 1007 vdp->xdf_l_act = dbp; 1008 } 1009 dbp->av_forw = NULL; 1010 dbp->av_back = NULL; 1011 mutex_exit(&vdp->xdf_dev_lk); 1012 xdf_iostart(vdp); 1013 err = xdf_drain_io(vdp); 1014 biofini(dbp); 1015 return (err); 1016 } 1017 1018 /*ARGSUSED*/ 1019 static int 1020 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 1021 int *rvalp) 1022 { 1023 int instance; 1024 xdf_t *vdp; 1025 minor_t minor; 1026 int part; 1027 1028 minor = getminor(dev); 1029 instance = XDF_INST(minor); 1030 1031 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 1032 return (ENXIO); 1033 1034 DPRINTF(IOCTL_DBG, ("xdf%d:ioctl: cmd %d (0x%x)\n", 1035 instance, cmd, cmd)); 1036 1037 part = XDF_PART(minor); 1038 if (!xdf_isopen(vdp, part)) 1039 return (ENXIO); 1040 1041 switch (cmd) { 1042 case DKIOCGMEDIAINFO: { 1043 struct dk_minfo media_info; 1044 1045 media_info.dki_lbsize = DEV_BSIZE; 1046 media_info.dki_capacity = vdp->xdf_xdev_nblocks; 1047 media_info.dki_media_type = DK_FIXED_DISK; 1048 1049 if (ddi_copyout(&media_info, (void *)arg, 1050 sizeof (struct dk_minfo), mode)) { 1051 return (EFAULT); 1052 } else { 1053 return (0); 1054 } 1055 } 1056 1057 case DKIOCINFO: { 1058 struct dk_cinfo info; 1059 1060 /* controller information */ 1061 if (XD_IS_CD(vdp)) 1062 info.dki_ctype = DKC_CDROM; 1063 else 1064 info.dki_ctype = DKC_VBD; 1065 1066 info.dki_cnum = 0; 1067 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 1068 1069 /* unit information */ 1070 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 1071 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 1072 info.dki_flags = DKI_FMTVOL; 1073 info.dki_partition = part; 1074 info.dki_maxtransfer = maxphys / DEV_BSIZE; 1075 info.dki_addr = 0; 1076 info.dki_space = 0; 1077 info.dki_prio = 0; 1078 info.dki_vec = 0; 1079 1080 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 1081 return (EFAULT); 1082 else 1083 return (0); 1084 } 1085 1086 case DKIOCSTATE: { 1087 enum dkio_state dkstate = DKIO_INSERTED; 1088 if (ddi_copyout(&dkstate, (void *)arg, sizeof (dkstate), 1089 mode) != 0) 1090 return (EFAULT); 1091 return (0); 1092 } 1093 1094 /* 1095 * is media removable? 1096 */ 1097 case DKIOCREMOVABLE: { 1098 int i = XD_IS_RM(vdp) ? 1 : 0; 1099 if (ddi_copyout(&i, (caddr_t)arg, sizeof (int), mode)) 1100 return (EFAULT); 1101 return (0); 1102 } 1103 1104 case DKIOCG_PHYGEOM: 1105 case DKIOCG_VIRTGEOM: 1106 case DKIOCGGEOM: 1107 case DKIOCSGEOM: 1108 case DKIOCGAPART: 1109 case DKIOCGVTOC: 1110 case DKIOCSVTOC: 1111 case DKIOCPARTINFO: 1112 case DKIOCGETEFI: 1113 case DKIOCSETEFI: 1114 case DKIOCPARTITION: { 1115 int rc; 1116 1117 rc = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 1118 rvalp, NULL); 1119 return (rc); 1120 } 1121 1122 case DKIOCGETWCE: 1123 if (ddi_copyout(&vdp->xdf_wce, (void *)arg, 1124 sizeof (vdp->xdf_wce), mode)) 1125 return (EFAULT); 1126 return (0); 1127 case DKIOCSETWCE: 1128 if (ddi_copyin((void *)arg, &vdp->xdf_wce, 1129 sizeof (vdp->xdf_wce), mode)) 1130 return (EFAULT); 1131 return (0); 1132 case DKIOCFLUSHWRITECACHE: { 1133 int rc; 1134 struct dk_callback *dkc = (struct dk_callback *)arg; 1135 1136 if (vdp->xdf_flush_supported) { 1137 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1138 NULL, 0, 0, (void *)dev); 1139 } else if (vdp->xdf_feature_barrier && 1140 !xdf_barrier_flush_disable) { 1141 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1142 vdp->xdf_cache_flush_block, xdf_flush_block, 1143 DEV_BSIZE, (void *)dev); 1144 } else { 1145 return (ENOTTY); 1146 } 1147 if ((mode & FKIOCTL) && (dkc != NULL) && 1148 (dkc->dkc_callback != NULL)) { 1149 (*dkc->dkc_callback)(dkc->dkc_cookie, rc); 1150 /* need to return 0 after calling callback */ 1151 rc = 0; 1152 } 1153 return (rc); 1154 } 1155 1156 default: 1157 return (ENOTTY); 1158 } 1159 } 1160 1161 /* 1162 * xdf interrupt handler 1163 */ 1164 static uint_t 1165 xdf_intr(caddr_t arg) 1166 { 1167 xdf_t *vdp = (xdf_t *)arg; 1168 xendev_ring_t *xbr; 1169 blkif_response_t *resp; 1170 int bioerr; 1171 uint64_t id; 1172 extern int do_polled_io; 1173 uint8_t op; 1174 uint16_t status; 1175 ddi_acc_handle_t acchdl; 1176 1177 mutex_enter(&vdp->xdf_dev_lk); 1178 1179 if ((xbr = vdp->xdf_xb_ring) == NULL) { 1180 mutex_exit(&vdp->xdf_dev_lk); 1181 return (DDI_INTR_UNCLAIMED); 1182 } 1183 1184 acchdl = vdp->xdf_xb_ring_hdl; 1185 1186 /* 1187 * complete all requests which have a response 1188 */ 1189 while (resp = xvdi_ring_get_response(xbr)) { 1190 id = ddi_get64(acchdl, &resp->id); 1191 op = ddi_get8(acchdl, &resp->operation); 1192 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 1193 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 1194 op, id, status)); 1195 1196 /* 1197 * XXPV - close connection to the backend and restart 1198 */ 1199 if (status != BLKIF_RSP_OKAY) { 1200 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 1201 ddi_get_name_addr(vdp->xdf_dip), 1202 (op == BLKIF_OP_READ) ? "reading" : "writing")); 1203 bioerr = EIO; 1204 } else { 1205 bioerr = 0; 1206 } 1207 1208 xdf_iofini(vdp, id, bioerr); 1209 } 1210 1211 mutex_exit(&vdp->xdf_dev_lk); 1212 1213 if (!do_polled_io) 1214 xdf_iostart(vdp); 1215 1216 return (DDI_INTR_CLAIMED); 1217 } 1218 1219 int xdf_fbrewrites; /* how many times was our flush block rewritten */ 1220 1221 /* 1222 * Snarf new data if our flush block was re-written 1223 */ 1224 static void 1225 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 1226 { 1227 int nblks; 1228 boolean_t mapin; 1229 1230 if (IS_WRITE_BARRIER(vdp, bp)) 1231 return; /* write was a flush write */ 1232 1233 mapin = B_FALSE; 1234 nblks = bp->b_bcount >> DEV_BSHIFT; 1235 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 1236 xdf_fbrewrites++; 1237 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 1238 mapin = B_TRUE; 1239 bp_mapin(bp); 1240 } 1241 bcopy(bp->b_un.b_addr + 1242 ((xdf_flush_block - blkno) << DEV_BSHIFT), 1243 vdp->xdf_cache_flush_block, DEV_BSIZE); 1244 if (mapin) 1245 bp_mapout(bp); 1246 } 1247 } 1248 1249 static void 1250 xdf_iofini(xdf_t *vdp, uint64_t id, int bioerr) 1251 { 1252 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 1253 v_req_t *vreq = gs->vreq; 1254 buf_t *bp = vreq->v_buf; 1255 1256 gs_free(vdp, gs); 1257 if (bioerr) 1258 bioerror(bp, bioerr); 1259 vreq->v_nslots--; 1260 if (vreq->v_nslots != 0) 1261 return; 1262 1263 XDF_UPDATE_IO_STAT(vdp, bp); 1264 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1265 1266 if (IS_ERROR(bp)) 1267 bp->b_resid = bp->b_bcount; 1268 1269 vreq_free(vdp, vreq); 1270 biodone(bp); 1271 } 1272 1273 /* 1274 * return value of xdf_prepare_rreq() 1275 * used in xdf_iostart() 1276 */ 1277 #define XF_PARTIAL 0 /* rreq is full, not all I/O in buf transferred */ 1278 #define XF_COMP 1 /* no more I/O left in buf */ 1279 1280 static void 1281 xdf_iostart(xdf_t *vdp) 1282 { 1283 xendev_ring_t *xbr; 1284 struct buf *bp; 1285 blkif_request_t *rreq; 1286 int retval; 1287 int rreqready = 0; 1288 1289 xbr = vdp->xdf_xb_ring; 1290 1291 /* 1292 * populate the ring request(s) 1293 * 1294 * loop until there is no buf to transfer or no free slot 1295 * available in I/O ring 1296 */ 1297 mutex_enter(&vdp->xdf_dev_lk); 1298 1299 for (;;) { 1300 if (vdp->xdf_status != XD_READY) 1301 break; 1302 1303 /* active buf queue empty? */ 1304 if ((bp = vdp->xdf_f_act) == NULL) 1305 break; 1306 1307 /* try to grab a vreq for this bp */ 1308 if ((BP2VREQ(bp) == NULL) && (vreq_get(vdp, bp) == NULL)) 1309 break; 1310 /* alloc DMA/GTE resources */ 1311 if (vreq_setup(vdp, BP2VREQ(bp)) != DDI_SUCCESS) 1312 break; 1313 1314 /* get next blkif_request in the ring */ 1315 if ((rreq = xvdi_ring_get_request(xbr)) == NULL) 1316 break; 1317 bzero(rreq, sizeof (blkif_request_t)); 1318 1319 /* populate blkif_request with this buf */ 1320 rreqready++; 1321 retval = xdf_prepare_rreq(vdp, bp, rreq); 1322 if (retval == XF_COMP) { 1323 /* finish this bp, switch to next one */ 1324 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1325 vdp->xdf_f_act = bp->av_forw; 1326 bp->av_forw = NULL; 1327 } 1328 } 1329 1330 /* 1331 * Send the request(s) to the backend 1332 */ 1333 if (rreqready) { 1334 if (xvdi_ring_push_request(xbr)) { 1335 DPRINTF(IO_DBG, ("xdf_iostart: " 1336 "sent request(s) to backend\n")); 1337 xvdi_notify_oe(vdp->xdf_dip); 1338 } 1339 } 1340 1341 mutex_exit(&vdp->xdf_dev_lk); 1342 } 1343 1344 /* 1345 * populate a single blkif_request_t w/ a buf 1346 */ 1347 static int 1348 xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1349 { 1350 int rval; 1351 grant_ref_t gr; 1352 uint8_t fsect, lsect; 1353 size_t bcnt; 1354 paddr_t dma_addr; 1355 off_t blk_off; 1356 dev_info_t *dip = vdp->xdf_dip; 1357 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1358 v_req_t *vreq = BP2VREQ(bp); 1359 uint64_t blkno = vreq->v_blkno; 1360 uint_t ndmacs = vreq->v_ndmacs; 1361 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1362 int seg = 0; 1363 int isread = IS_READ(bp); 1364 1365 if (isread) 1366 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1367 else { 1368 switch (vreq->v_flush_diskcache) { 1369 case FLUSH_DISKCACHE: 1370 ddi_put8(acchdl, &rreq->operation, 1371 BLKIF_OP_FLUSH_DISKCACHE); 1372 ddi_put16(acchdl, &rreq->handle, vdev); 1373 ddi_put64(acchdl, &rreq->id, 1374 (uint64_t)(uintptr_t)(vreq->v_gs)); 1375 ddi_put8(acchdl, &rreq->nr_segments, 0); 1376 return (XF_COMP); 1377 case WRITE_BARRIER: 1378 ddi_put8(acchdl, &rreq->operation, 1379 BLKIF_OP_WRITE_BARRIER); 1380 break; 1381 default: 1382 if (!vdp->xdf_wce) 1383 ddi_put8(acchdl, &rreq->operation, 1384 BLKIF_OP_WRITE_BARRIER); 1385 else 1386 ddi_put8(acchdl, &rreq->operation, 1387 BLKIF_OP_WRITE); 1388 break; 1389 } 1390 } 1391 1392 ddi_put16(acchdl, &rreq->handle, vdev); 1393 ddi_put64(acchdl, &rreq->sector_number, blkno); 1394 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(vreq->v_gs)); 1395 1396 /* 1397 * loop until all segments are populated or no more dma cookie in buf 1398 */ 1399 for (;;) { 1400 /* 1401 * Each segment of a blkif request can transfer up to 1402 * one 4K page of data. 1403 */ 1404 bcnt = vreq->v_dmac.dmac_size; 1405 ASSERT(bcnt <= PAGESIZE); 1406 ASSERT((bcnt % XB_BSIZE) == 0); 1407 dma_addr = vreq->v_dmac.dmac_laddress; 1408 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1409 ASSERT((blk_off & XB_BMASK) == 0); 1410 fsect = blk_off >> XB_BSHIFT; 1411 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1412 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1413 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1414 DPRINTF(IO_DBG, (" ""seg%d: dmacS %lu blk_off %ld\n", 1415 seg, vreq->v_dmac.dmac_size, blk_off)); 1416 gr = gs_grant(vreq->v_gs, PATOMA(dma_addr) >> PAGESHIFT); 1417 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1418 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1419 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1420 DPRINTF(IO_DBG, (" ""seg%d: fs %d ls %d gr %d dma 0x%"PRIx64 1421 "\n", seg, fsect, lsect, gr, dma_addr)); 1422 1423 blkno += (bcnt >> XB_BSHIFT); 1424 seg++; 1425 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1426 if (--ndmacs) { 1427 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1428 continue; 1429 } 1430 1431 vreq->v_status = VREQ_DMAWIN_DONE; 1432 vreq->v_blkno = blkno; 1433 if (vreq->v_dmaw + 1 == vreq->v_ndmaws) 1434 /* last win */ 1435 rval = XF_COMP; 1436 else 1437 rval = XF_PARTIAL; 1438 break; 1439 } 1440 ddi_put8(acchdl, &rreq->nr_segments, seg); 1441 DPRINTF(IO_DBG, ("xdf_prepare_rreq: request id=%"PRIx64" ready\n", 1442 rreq->id)); 1443 1444 return (rval); 1445 } 1446 1447 #define XDF_QSEC 50000 /* .005 second */ 1448 #define XDF_POLLCNT 12 /* loop for 12 times before time out */ 1449 1450 static int 1451 xdf_drain_io(xdf_t *vdp) 1452 { 1453 int pollc, rval; 1454 xendev_ring_t *xbr; 1455 1456 if (xdfdebug & SUSRES_DBG) 1457 xen_printf("xdf_drain_io: start\n"); 1458 1459 mutex_enter(&vdp->xdf_dev_lk); 1460 1461 if ((vdp->xdf_status != XD_READY) && (vdp->xdf_status != XD_SUSPEND)) 1462 goto out; 1463 1464 rval = 0; 1465 xbr = vdp->xdf_xb_ring; 1466 ASSERT(xbr != NULL); 1467 1468 for (pollc = 0; pollc < XDF_POLLCNT; pollc++) { 1469 if (xvdi_ring_has_unconsumed_responses(xbr)) { 1470 mutex_exit(&vdp->xdf_dev_lk); 1471 (void) xdf_intr((caddr_t)vdp); 1472 mutex_enter(&vdp->xdf_dev_lk); 1473 } 1474 if (!xvdi_ring_has_incomp_request(xbr)) 1475 goto out; 1476 1477 #ifndef XPV_HVM_DRIVER 1478 (void) HYPERVISOR_yield(); 1479 #endif 1480 /* 1481 * file-backed devices can be slow 1482 */ 1483 drv_usecwait(XDF_QSEC << pollc); 1484 } 1485 cmn_err(CE_WARN, "xdf_polled_io: timeout"); 1486 rval = EIO; 1487 out: 1488 mutex_exit(&vdp->xdf_dev_lk); 1489 if (xdfdebug & SUSRES_DBG) 1490 xen_printf("xdf_drain_io: end, err=%d\n", rval); 1491 return (rval); 1492 } 1493 1494 /* ARGSUSED5 */ 1495 static int 1496 xdf_lb_rdwr(dev_info_t *devi, uchar_t cmd, void *bufp, 1497 diskaddr_t start, size_t reqlen, void *tg_cookie) 1498 { 1499 xdf_t *vdp; 1500 struct buf *bp; 1501 int err = 0; 1502 1503 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1504 if (vdp == NULL) 1505 return (ENXIO); 1506 1507 if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_xdev_nblocks) 1508 return (EINVAL); 1509 1510 bp = getrbuf(KM_SLEEP); 1511 if (cmd == TG_READ) 1512 bp->b_flags = B_BUSY | B_READ; 1513 else 1514 bp->b_flags = B_BUSY | B_WRITE; 1515 bp->b_un.b_addr = bufp; 1516 bp->b_bcount = reqlen; 1517 bp->b_resid = 0; 1518 bp->b_blkno = start; 1519 bp->av_forw = NULL; 1520 bp->av_back = NULL; 1521 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 1522 1523 mutex_enter(&vdp->xdf_dev_lk); 1524 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1525 if (vdp->xdf_f_act == NULL) { 1526 vdp->xdf_f_act = vdp->xdf_l_act = bp; 1527 } else { 1528 vdp->xdf_l_act->av_forw = bp; 1529 vdp->xdf_l_act = bp; 1530 } 1531 mutex_exit(&vdp->xdf_dev_lk); 1532 xdf_iostart(vdp); 1533 err = biowait(bp); 1534 1535 ASSERT(bp->b_flags & B_DONE); 1536 1537 freerbuf(bp); 1538 return (err); 1539 } 1540 1541 /* 1542 * synthetic geometry 1543 */ 1544 #define XDF_NSECTS 256 1545 #define XDF_NHEADS 16 1546 1547 static int 1548 xdf_lb_getcap(dev_info_t *devi, diskaddr_t *capp) 1549 { 1550 xdf_t *vdp; 1551 1552 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1553 1554 if (vdp == NULL) 1555 return (ENXIO); 1556 1557 mutex_enter(&vdp->xdf_dev_lk); 1558 *capp = vdp->xdf_xdev_nblocks; 1559 DPRINTF(LBL_DBG, ("capacity %llu\n", *capp)); 1560 mutex_exit(&vdp->xdf_dev_lk); 1561 return (0); 1562 } 1563 1564 static int 1565 xdf_lb_getpgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1566 { 1567 xdf_t *vdp; 1568 uint_t ncyl; 1569 uint_t spc = XDF_NHEADS * XDF_NSECTS; 1570 1571 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1572 1573 if (vdp == NULL) 1574 return (ENXIO); 1575 1576 ncyl = vdp->xdf_xdev_nblocks / spc; 1577 1578 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1579 geomp->g_acyl = 0; 1580 geomp->g_nhead = XDF_NHEADS; 1581 geomp->g_secsize = XB_BSIZE; 1582 geomp->g_nsect = XDF_NSECTS; 1583 geomp->g_intrlv = 0; 1584 geomp->g_rpm = 7200; 1585 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1586 return (0); 1587 } 1588 1589 /* 1590 * No real HBA, no geometry available from it 1591 */ 1592 /*ARGSUSED*/ 1593 static int 1594 xdf_lb_getvgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1595 { 1596 return (EINVAL); 1597 } 1598 1599 static int 1600 xdf_lb_getattribute(dev_info_t *devi, tg_attribute_t *tgattributep) 1601 { 1602 xdf_t *vdp; 1603 1604 if (!(vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)))) 1605 return (ENXIO); 1606 1607 if (XD_IS_RO(vdp)) 1608 tgattributep->media_is_writable = 0; 1609 else 1610 tgattributep->media_is_writable = 1; 1611 return (0); 1612 } 1613 1614 /* ARGSUSED3 */ 1615 static int 1616 xdf_lb_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie) 1617 { 1618 switch (cmd) { 1619 case TG_GETPHYGEOM: 1620 return (xdf_lb_getpgeom(devi, (cmlb_geom_t *)arg)); 1621 case TG_GETVIRTGEOM: 1622 return (xdf_lb_getvgeom(devi, (cmlb_geom_t *)arg)); 1623 case TG_GETCAPACITY: 1624 return (xdf_lb_getcap(devi, (diskaddr_t *)arg)); 1625 case TG_GETBLOCKSIZE: 1626 *(uint32_t *)arg = XB_BSIZE; 1627 return (0); 1628 case TG_GETATTR: 1629 return (xdf_lb_getattribute(devi, (tg_attribute_t *)arg)); 1630 default: 1631 return (ENOTTY); 1632 } 1633 } 1634 1635 /* 1636 * Kick-off connect process 1637 * Status should be XD_UNKNOWN or XD_CLOSED 1638 * On success, status will be changed to XD_INIT 1639 * On error, status won't be changed 1640 */ 1641 static int 1642 xdf_start_connect(xdf_t *vdp) 1643 { 1644 char *xsnode; 1645 grant_ref_t gref; 1646 xenbus_transaction_t xbt; 1647 int rv; 1648 dev_info_t *dip = vdp->xdf_dip; 1649 1650 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == (domid_t)-1) 1651 goto errout; 1652 1653 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) { 1654 cmn_err(CE_WARN, "xdf@%s: failed to alloc event channel", 1655 ddi_get_name_addr(dip)); 1656 goto errout; 1657 } 1658 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1659 #ifdef XPV_HVM_DRIVER 1660 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1661 #else 1662 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1663 DDI_SUCCESS) { 1664 cmn_err(CE_WARN, "xdf_start_connect: xdf@%s: " 1665 "failed to add intr handler", ddi_get_name_addr(dip)); 1666 goto errout1; 1667 } 1668 #endif 1669 1670 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1671 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1672 DDI_SUCCESS) { 1673 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1674 ddi_get_name_addr(dip)); 1675 goto errout2; 1676 } 1677 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1678 1679 /* 1680 * Write into xenstore the info needed by backend 1681 */ 1682 if ((xsnode = xvdi_get_xsname(dip)) == NULL) { 1683 cmn_err(CE_WARN, "xdf@%s: " 1684 "failed to get xenstore node path", 1685 ddi_get_name_addr(dip)); 1686 goto fail_trans; 1687 } 1688 trans_retry: 1689 if (xenbus_transaction_start(&xbt)) { 1690 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1691 ddi_get_name_addr(dip)); 1692 xvdi_fatal_error(dip, EIO, "transaction start"); 1693 goto fail_trans; 1694 } 1695 1696 if (rv = xenbus_printf(xbt, xsnode, "ring-ref", "%u", gref)) { 1697 cmn_err(CE_WARN, "xdf@%s: failed to write ring-ref", 1698 ddi_get_name_addr(dip)); 1699 xvdi_fatal_error(dip, rv, "writing ring-ref"); 1700 goto abort_trans; 1701 } 1702 1703 if (rv = xenbus_printf(xbt, xsnode, "event-channel", "%u", 1704 vdp->xdf_evtchn)) { 1705 cmn_err(CE_WARN, "xdf@%s: failed to write event-channel", 1706 ddi_get_name_addr(dip)); 1707 xvdi_fatal_error(dip, rv, "writing event-channel"); 1708 goto abort_trans; 1709 } 1710 1711 if ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0) { 1712 cmn_err(CE_WARN, "xdf@%s: " 1713 "failed to switch state to XenbusStateInitialised", 1714 ddi_get_name_addr(dip)); 1715 xvdi_fatal_error(dip, rv, "writing state"); 1716 goto abort_trans; 1717 } 1718 1719 /* kick-off connect process */ 1720 if (rv = xenbus_transaction_end(xbt, 0)) { 1721 if (rv == EAGAIN) 1722 goto trans_retry; 1723 cmn_err(CE_WARN, "xdf@%s: failed to end transaction", 1724 ddi_get_name_addr(dip)); 1725 xvdi_fatal_error(dip, rv, "completing transaction"); 1726 goto fail_trans; 1727 } 1728 1729 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1730 mutex_enter(&vdp->xdf_dev_lk); 1731 vdp->xdf_status = XD_INIT; 1732 mutex_exit(&vdp->xdf_dev_lk); 1733 1734 return (DDI_SUCCESS); 1735 1736 abort_trans: 1737 (void) xenbus_transaction_end(xbt, 1); 1738 fail_trans: 1739 xvdi_free_ring(vdp->xdf_xb_ring); 1740 errout2: 1741 #ifdef XPV_HVM_DRIVER 1742 ec_unbind_evtchn(vdp->xdf_evtchn); 1743 #else 1744 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1745 #endif 1746 errout1: 1747 xvdi_free_evtchn(dip); 1748 errout: 1749 cmn_err(CE_WARN, "xdf@%s: fail to kick-off connecting", 1750 ddi_get_name_addr(dip)); 1751 return (DDI_FAILURE); 1752 } 1753 1754 /* 1755 * Kick-off disconnect process 1756 * Status won't be changed 1757 */ 1758 static int 1759 xdf_start_disconnect(xdf_t *vdp) 1760 { 1761 if (xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed) > 0) { 1762 cmn_err(CE_WARN, "xdf@%s: fail to kick-off disconnecting", 1763 ddi_get_name_addr(vdp->xdf_dip)); 1764 return (DDI_FAILURE); 1765 } 1766 1767 return (DDI_SUCCESS); 1768 } 1769 1770 int 1771 xdf_get_flush_block(xdf_t *vdp) 1772 { 1773 /* 1774 * Get a DEV_BSIZE aligned bufer 1775 */ 1776 vdp->xdf_flush_mem = kmem_alloc(DEV_BSIZE * 2, KM_SLEEP); 1777 vdp->xdf_cache_flush_block = 1778 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), DEV_BSIZE); 1779 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1780 xdf_flush_block, DEV_BSIZE, NULL) != 0) 1781 return (DDI_FAILURE); 1782 return (DDI_SUCCESS); 1783 } 1784 1785 /* 1786 * Finish other initialization after we've connected to backend 1787 * Status should be XD_INIT before calling this routine 1788 * On success, status should be changed to XD_READY 1789 * On error, status should stay XD_INIT 1790 */ 1791 static int 1792 xdf_post_connect(xdf_t *vdp) 1793 { 1794 int rv; 1795 uint_t len; 1796 char *type; 1797 char *barrier; 1798 dev_info_t *devi = vdp->xdf_dip; 1799 1800 /* 1801 * Determine if feature barrier is supported by backend 1802 */ 1803 if (xenbus_read(XBT_NULL, xvdi_get_oename(devi), 1804 "feature-barrier", (void **)&barrier, &len) == 0) { 1805 vdp->xdf_feature_barrier = 1; 1806 kmem_free(barrier, len); 1807 } else { 1808 cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier", 1809 ddi_get_name_addr(vdp->xdf_dip)); 1810 vdp->xdf_feature_barrier = 0; 1811 } 1812 1813 /* probe backend */ 1814 if (rv = xenbus_gather(XBT_NULL, xvdi_get_oename(devi), 1815 "sectors", "%"SCNu64, &vdp->xdf_xdev_nblocks, 1816 "info", "%u", &vdp->xdf_xdev_info, NULL)) { 1817 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1818 "cannot read backend info", ddi_get_name_addr(devi)); 1819 xvdi_fatal_error(devi, rv, "reading backend info"); 1820 return (DDI_FAILURE); 1821 } 1822 1823 /* fix disk type */ 1824 if (xenbus_read(XBT_NULL, xvdi_get_xsname(devi), "device-type", 1825 (void **)&type, &len) != 0) { 1826 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1827 "cannot read device-type", ddi_get_name_addr(devi)); 1828 xvdi_fatal_error(devi, rv, "reading device-type"); 1829 return (DDI_FAILURE); 1830 } 1831 if (strcmp(type, "cdrom") == 0) 1832 vdp->xdf_xdev_info |= VDISK_CDROM; 1833 kmem_free(type, len); 1834 1835 /* 1836 * We've created all the minor nodes via cmlb_attach() using default 1837 * value in xdf_attach() to make it possible to block in xdf_open(), 1838 * in case there's anyone (say, booting thread) ever trying to open 1839 * it before connected to backend. We will refresh all those minor 1840 * nodes w/ latest info we've got now when we are almost connected. 1841 * 1842 * Don't do this when xdf is already opened by someone (could happen 1843 * during resume), for that cmlb_attach() will invalid the label info 1844 * and confuse those who has already opened the node, which is bad. 1845 */ 1846 if (!xdf_isopen(vdp, -1) && (XD_IS_CD(vdp) || XD_IS_RM(vdp))) { 1847 /* re-init cmlb w/ latest info we got from backend */ 1848 if (cmlb_attach(devi, &xdf_lb_ops, 1849 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 1850 XD_IS_RM(vdp), 1, DDI_NT_BLOCK, 1851 CMLB_FAKE_LABEL_ONE_PARTITION, 1852 vdp->xdf_vd_lbl, NULL) != 0) { 1853 cmn_err(CE_WARN, "xdf@%s: cmlb attach failed", 1854 ddi_get_name_addr(devi)); 1855 return (DDI_FAILURE); 1856 } 1857 } 1858 1859 /* mark vbd is ready for I/O */ 1860 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1861 mutex_enter(&vdp->xdf_dev_lk); 1862 vdp->xdf_status = XD_READY; 1863 mutex_exit(&vdp->xdf_dev_lk); 1864 /* 1865 * If backend has feature-barrier, see if it supports disk 1866 * cache flush op. 1867 */ 1868 vdp->xdf_flush_supported = 0; 1869 if (vdp->xdf_feature_barrier) { 1870 /* 1871 * Pretend we already know flush is supported so probe 1872 * will attempt the correct op. 1873 */ 1874 vdp->xdf_flush_supported = 1; 1875 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1876 vdp->xdf_flush_supported = 1; 1877 } else { 1878 vdp->xdf_flush_supported = 0; 1879 /* 1880 * If the other end does not support the cache flush op 1881 * then we must use a barrier-write to force disk 1882 * cache flushing. Barrier writes require that a data 1883 * block actually be written. 1884 * Cache a block to barrier-write when we are 1885 * asked to perform a flush. 1886 * XXX - would it be better to just copy 1 block 1887 * (512 bytes) from whatever write we did last 1888 * and rewrite that block? 1889 */ 1890 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) 1891 return (DDI_FAILURE); 1892 } 1893 } 1894 1895 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", ddi_get_name_addr(devi), 1896 (uint64_t)vdp->xdf_xdev_nblocks); 1897 1898 return (DDI_SUCCESS); 1899 } 1900 1901 /* 1902 * Finish other uninitialization after we've disconnected from backend 1903 * when status is XD_CLOSING or XD_INIT. After returns, status is XD_CLOSED 1904 */ 1905 static void 1906 xdf_post_disconnect(xdf_t *vdp) 1907 { 1908 #ifdef XPV_HVM_DRIVER 1909 ec_unbind_evtchn(vdp->xdf_evtchn); 1910 #else 1911 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1912 #endif 1913 xvdi_free_evtchn(vdp->xdf_dip); 1914 xvdi_free_ring(vdp->xdf_xb_ring); 1915 vdp->xdf_xb_ring = NULL; 1916 vdp->xdf_xb_ring_hdl = NULL; 1917 vdp->xdf_peer = (domid_t)-1; 1918 1919 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1920 mutex_enter(&vdp->xdf_dev_lk); 1921 vdp->xdf_status = XD_CLOSED; 1922 mutex_exit(&vdp->xdf_dev_lk); 1923 } 1924 1925 /*ARGSUSED*/ 1926 static void 1927 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 1928 { 1929 XenbusState new_state = *(XenbusState *)impl_data; 1930 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 1931 boolean_t unexpect_die = B_FALSE; 1932 int status; 1933 1934 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 1935 ddi_get_name_addr(dip), new_state)); 1936 1937 mutex_enter(&vdp->xdf_cb_lk); 1938 1939 if (xdf_check_state_transition(vdp, new_state) == DDI_FAILURE) { 1940 mutex_exit(&vdp->xdf_cb_lk); 1941 return; 1942 } 1943 1944 switch (new_state) { 1945 case XenbusStateInitialising: 1946 ASSERT(vdp->xdf_status == XD_CLOSED); 1947 /* 1948 * backend recovered from a previous failure, 1949 * kick-off connect process again 1950 */ 1951 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 1952 cmn_err(CE_WARN, "xdf@%s:" 1953 " failed to start reconnecting to backend", 1954 ddi_get_name_addr(dip)); 1955 } 1956 break; 1957 case XenbusStateConnected: 1958 ASSERT(vdp->xdf_status == XD_INIT); 1959 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1960 /* finish final init after connect */ 1961 if (xdf_post_connect(vdp) != DDI_SUCCESS) 1962 (void) xdf_start_disconnect(vdp); 1963 break; 1964 case XenbusStateClosing: 1965 if (vdp->xdf_status == XD_READY) { 1966 mutex_enter(&vdp->xdf_dev_lk); 1967 if (xdf_isopen(vdp, -1)) { 1968 cmn_err(CE_NOTE, "xdf@%s: hot-unplug failed, " 1969 "still in use", ddi_get_name_addr(dip)); 1970 mutex_exit(&vdp->xdf_dev_lk); 1971 break; 1972 } else { 1973 vdp->xdf_status = XD_CLOSING; 1974 } 1975 mutex_exit(&vdp->xdf_dev_lk); 1976 } 1977 (void) xdf_start_disconnect(vdp); 1978 break; 1979 case XenbusStateClosed: 1980 /* first check if BE closed unexpectedly */ 1981 mutex_enter(&vdp->xdf_dev_lk); 1982 if (xdf_isopen(vdp, -1)) { 1983 unexpect_die = B_TRUE; 1984 unexpectedie(vdp); 1985 cmn_err(CE_WARN, "xdf@%s: backend closed, " 1986 "reconnecting...", ddi_get_name_addr(dip)); 1987 } 1988 mutex_exit(&vdp->xdf_dev_lk); 1989 1990 if (vdp->xdf_status == XD_READY) { 1991 mutex_enter(&vdp->xdf_dev_lk); 1992 vdp->xdf_status = XD_CLOSING; 1993 mutex_exit(&vdp->xdf_dev_lk); 1994 1995 #ifdef DOMU_BACKEND 1996 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1997 #endif 1998 1999 xdf_post_disconnect(vdp); 2000 (void) xvdi_switch_state(dip, XBT_NULL, 2001 XenbusStateClosed); 2002 } else if ((vdp->xdf_status == XD_INIT) || 2003 (vdp->xdf_status == XD_CLOSING)) { 2004 xdf_post_disconnect(vdp); 2005 } else { 2006 mutex_enter(&vdp->xdf_dev_lk); 2007 vdp->xdf_status = XD_CLOSED; 2008 mutex_exit(&vdp->xdf_dev_lk); 2009 } 2010 } 2011 2012 /* notify anybody waiting for oe state change */ 2013 mutex_enter(&vdp->xdf_dev_lk); 2014 cv_broadcast(&vdp->xdf_dev_cv); 2015 mutex_exit(&vdp->xdf_dev_lk); 2016 2017 status = vdp->xdf_status; 2018 mutex_exit(&vdp->xdf_cb_lk); 2019 2020 if (status == XD_READY) { 2021 xdf_iostart(vdp); 2022 } else if ((status == XD_CLOSED) && !unexpect_die) { 2023 /* interface is closed successfully, remove all minor nodes */ 2024 cmlb_detach(vdp->xdf_vd_lbl, NULL); 2025 cmlb_free_handle(&vdp->xdf_vd_lbl); 2026 } 2027 } 2028 2029 /* check if partition is open, -1 - check all partitions on the disk */ 2030 static boolean_t 2031 xdf_isopen(xdf_t *vdp, int partition) 2032 { 2033 int i; 2034 ulong_t parbit; 2035 boolean_t rval = B_FALSE; 2036 2037 if (partition == -1) 2038 parbit = (ulong_t)-1; 2039 else 2040 parbit = 1 << partition; 2041 2042 for (i = 0; i < OTYPCNT; i++) { 2043 if (vdp->xdf_vd_open[i] & parbit) 2044 rval = B_TRUE; 2045 } 2046 2047 return (rval); 2048 } 2049 2050 /* 2051 * Xdf_check_state_transition will check the XenbusState change to see 2052 * if the change is a valid transition or not. 2053 * The new state is written by backend domain, or by running xenstore-write 2054 * to change it manually in dom0 2055 */ 2056 static int 2057 xdf_check_state_transition(xdf_t *vdp, XenbusState oestate) 2058 { 2059 int status; 2060 int stcheck; 2061 #define STOK 0 /* need further process */ 2062 #define STNOP 1 /* no action need taking */ 2063 #define STBUG 2 /* unexpected state change, could be a bug */ 2064 2065 status = vdp->xdf_status; 2066 stcheck = STOK; 2067 2068 switch (status) { 2069 case XD_UNKNOWN: 2070 if ((oestate == XenbusStateUnknown) || 2071 (oestate == XenbusStateConnected)) 2072 stcheck = STBUG; 2073 else if ((oestate == XenbusStateInitialising) || 2074 (oestate == XenbusStateInitWait) || 2075 (oestate == XenbusStateInitialised)) 2076 stcheck = STNOP; 2077 break; 2078 case XD_INIT: 2079 if (oestate == XenbusStateUnknown) 2080 stcheck = STBUG; 2081 else if ((oestate == XenbusStateInitialising) || 2082 (oestate == XenbusStateInitWait) || 2083 (oestate == XenbusStateInitialised)) 2084 stcheck = STNOP; 2085 break; 2086 case XD_READY: 2087 if ((oestate == XenbusStateUnknown) || 2088 (oestate == XenbusStateInitialising) || 2089 (oestate == XenbusStateInitWait) || 2090 (oestate == XenbusStateInitialised)) 2091 stcheck = STBUG; 2092 else if (oestate == XenbusStateConnected) 2093 stcheck = STNOP; 2094 break; 2095 case XD_CLOSING: 2096 if ((oestate == XenbusStateUnknown) || 2097 (oestate == XenbusStateInitialising) || 2098 (oestate == XenbusStateInitWait) || 2099 (oestate == XenbusStateInitialised) || 2100 (oestate == XenbusStateConnected)) 2101 stcheck = STBUG; 2102 else if (oestate == XenbusStateClosing) 2103 stcheck = STNOP; 2104 break; 2105 case XD_CLOSED: 2106 if ((oestate == XenbusStateUnknown) || 2107 (oestate == XenbusStateConnected)) 2108 stcheck = STBUG; 2109 else if ((oestate == XenbusStateInitWait) || 2110 (oestate == XenbusStateInitialised) || 2111 (oestate == XenbusStateClosing) || 2112 (oestate == XenbusStateClosed)) 2113 stcheck = STNOP; 2114 break; 2115 case XD_SUSPEND: 2116 default: 2117 stcheck = STBUG; 2118 } 2119 2120 if (stcheck == STOK) 2121 return (DDI_SUCCESS); 2122 2123 if (stcheck == STBUG) 2124 cmn_err(CE_NOTE, "xdf@%s: unexpected otherend " 2125 "state change to %d!, when status is %d", 2126 ddi_get_name_addr(vdp->xdf_dip), oestate, status); 2127 2128 return (DDI_FAILURE); 2129 } 2130 2131 static int 2132 xdf_connect(xdf_t *vdp, boolean_t wait) 2133 { 2134 ASSERT(mutex_owned(&vdp->xdf_dev_lk)); 2135 while (vdp->xdf_status != XD_READY) { 2136 if (!wait || (vdp->xdf_status > XD_READY)) 2137 break; 2138 2139 if (cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk) == 0) 2140 break; 2141 } 2142 2143 return (vdp->xdf_status); 2144 } 2145 2146 /* 2147 * callback func when DMA/GTE resources is available 2148 * 2149 * Note: we only register one callback function to grant table subsystem 2150 * since we only have one 'struct gnttab_free_callback' in xdf_t. 2151 */ 2152 static int 2153 xdf_dmacallback(caddr_t arg) 2154 { 2155 xdf_t *vdp = (xdf_t *)arg; 2156 ASSERT(vdp != NULL); 2157 2158 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 2159 ddi_get_name_addr(vdp->xdf_dip))); 2160 2161 ddi_trigger_softintr(vdp->xdf_softintr_id); 2162 return (DDI_DMA_CALLBACK_DONE); 2163 } 2164 2165 static uint_t 2166 xdf_iorestart(caddr_t arg) 2167 { 2168 xdf_t *vdp = (xdf_t *)arg; 2169 2170 ASSERT(vdp != NULL); 2171 2172 mutex_enter(&vdp->xdf_dev_lk); 2173 ASSERT(ISDMACBON(vdp)); 2174 SETDMACBOFF(vdp); 2175 mutex_exit(&vdp->xdf_dev_lk); 2176 2177 xdf_iostart(vdp); 2178 2179 return (DDI_INTR_CLAIMED); 2180 } 2181 2182 static void 2183 xdf_timeout_handler(void *arg) 2184 { 2185 xdf_t *vdp = arg; 2186 2187 mutex_enter(&vdp->xdf_dev_lk); 2188 vdp->xdf_timeout_id = 0; 2189 mutex_exit(&vdp->xdf_dev_lk); 2190 2191 /* new timeout thread could be re-scheduled */ 2192 xdf_iostart(vdp); 2193 } 2194 2195 /* 2196 * Alloc a vreq for this bp 2197 * bp->av_back contains the pointer to the vreq upon return 2198 */ 2199 static v_req_t * 2200 vreq_get(xdf_t *vdp, buf_t *bp) 2201 { 2202 v_req_t *vreq = NULL; 2203 2204 ASSERT(BP2VREQ(bp) == NULL); 2205 2206 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 2207 if (vreq == NULL) { 2208 if (vdp->xdf_timeout_id == 0) 2209 /* restart I/O after one second */ 2210 vdp->xdf_timeout_id = 2211 timeout(xdf_timeout_handler, vdp, hz); 2212 return (NULL); 2213 } 2214 bzero(vreq, sizeof (v_req_t)); 2215 2216 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 2217 bp->av_back = (buf_t *)vreq; 2218 vreq->v_buf = bp; 2219 vreq->v_status = VREQ_INIT; 2220 /* init of other fields in vreq is up to the caller */ 2221 2222 return (vreq); 2223 } 2224 2225 static void 2226 vreq_free(xdf_t *vdp, v_req_t *vreq) 2227 { 2228 buf_t *bp = vreq->v_buf; 2229 2230 list_remove(&vdp->xdf_vreq_act, (void *)vreq); 2231 2232 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 2233 goto done; 2234 2235 switch (vreq->v_status) { 2236 case VREQ_DMAWIN_DONE: 2237 case VREQ_GS_ALLOCED: 2238 case VREQ_DMABUF_BOUND: 2239 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 2240 /*FALLTHRU*/ 2241 case VREQ_DMAMEM_ALLOCED: 2242 if (!ALIGNED_XFER(bp)) { 2243 ASSERT(vreq->v_abuf != NULL); 2244 if (!IS_ERROR(bp) && IS_READ(bp)) 2245 bcopy(vreq->v_abuf, bp->b_un.b_addr, 2246 bp->b_bcount); 2247 ddi_dma_mem_free(&vreq->v_align); 2248 } 2249 /*FALLTHRU*/ 2250 case VREQ_MEMDMAHDL_ALLOCED: 2251 if (!ALIGNED_XFER(bp)) 2252 ddi_dma_free_handle(&vreq->v_memdmahdl); 2253 /*FALLTHRU*/ 2254 case VREQ_DMAHDL_ALLOCED: 2255 ddi_dma_free_handle(&vreq->v_dmahdl); 2256 break; 2257 default: 2258 break; 2259 } 2260 done: 2261 vreq->v_buf->av_back = NULL; 2262 kmem_cache_free(xdf_vreq_cache, vreq); 2263 } 2264 2265 /* 2266 * Initalize the DMA and grant table resources for the buf 2267 */ 2268 static int 2269 vreq_setup(xdf_t *vdp, v_req_t *vreq) 2270 { 2271 int rc; 2272 ddi_dma_attr_t dmaattr; 2273 uint_t ndcs, ndws; 2274 ddi_dma_handle_t dh; 2275 ddi_dma_handle_t mdh; 2276 ddi_dma_cookie_t dc; 2277 ddi_acc_handle_t abh; 2278 caddr_t aba; 2279 ge_slot_t *gs; 2280 size_t bufsz; 2281 off_t off; 2282 size_t sz; 2283 buf_t *bp = vreq->v_buf; 2284 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 2285 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 2286 2287 switch (vreq->v_status) { 2288 case VREQ_INIT: 2289 if (IS_FLUSH_DISKCACHE(bp)) { 2290 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2291 DPRINTF(DMA_DBG, ( 2292 "xdf@%s: get ge_slotfailed\n", 2293 ddi_get_name_addr(vdp->xdf_dip))); 2294 return (DDI_FAILURE); 2295 } 2296 vreq->v_blkno = 0; 2297 vreq->v_nslots = 1; 2298 vreq->v_gs = gs; 2299 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 2300 vreq->v_status = VREQ_GS_ALLOCED; 2301 gs->vreq = vreq; 2302 return (DDI_SUCCESS); 2303 } 2304 2305 if (IS_WRITE_BARRIER(vdp, bp)) 2306 vreq->v_flush_diskcache = WRITE_BARRIER; 2307 vreq->v_blkno = bp->b_blkno + 2308 (diskaddr_t)(uintptr_t)bp->b_private; 2309 bp->b_private = NULL; 2310 /* See if we wrote new data to our flush block */ 2311 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 2312 check_fbwrite(vdp, bp, vreq->v_blkno); 2313 vreq->v_status = VREQ_INIT_DONE; 2314 /*FALLTHRU*/ 2315 2316 case VREQ_INIT_DONE: 2317 /* 2318 * alloc DMA handle 2319 */ 2320 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 2321 xdf_dmacallback, (caddr_t)vdp, &dh); 2322 if (rc != DDI_SUCCESS) { 2323 SETDMACBON(vdp); 2324 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 2325 ddi_get_name_addr(vdp->xdf_dip))); 2326 return (DDI_FAILURE); 2327 } 2328 2329 vreq->v_dmahdl = dh; 2330 vreq->v_status = VREQ_DMAHDL_ALLOCED; 2331 /*FALLTHRU*/ 2332 2333 case VREQ_DMAHDL_ALLOCED: 2334 /* 2335 * alloc dma handle for 512-byte aligned buf 2336 */ 2337 if (!ALIGNED_XFER(bp)) { 2338 /* 2339 * XXPV: we need to temporarily enlarge the seg 2340 * boundary and s/g length to work round CR6381968 2341 */ 2342 dmaattr = xb_dma_attr; 2343 dmaattr.dma_attr_seg = (uint64_t)-1; 2344 dmaattr.dma_attr_sgllen = INT_MAX; 2345 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 2346 xdf_dmacallback, (caddr_t)vdp, &mdh); 2347 if (rc != DDI_SUCCESS) { 2348 SETDMACBON(vdp); 2349 DPRINTF(DMA_DBG, ("xdf@%s: unaligned buf DMA" 2350 "handle alloc failed\n", 2351 ddi_get_name_addr(vdp->xdf_dip))); 2352 return (DDI_FAILURE); 2353 } 2354 vreq->v_memdmahdl = mdh; 2355 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 2356 } 2357 /*FALLTHRU*/ 2358 2359 case VREQ_MEMDMAHDL_ALLOCED: 2360 /* 2361 * alloc 512-byte aligned buf 2362 */ 2363 if (!ALIGNED_XFER(bp)) { 2364 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 2365 bp_mapin(bp); 2366 2367 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 2368 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 2369 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 2370 &aba, &bufsz, &abh); 2371 if (rc != DDI_SUCCESS) { 2372 SETDMACBON(vdp); 2373 DPRINTF(DMA_DBG, ( 2374 "xdf@%s: DMA mem allocation failed\n", 2375 ddi_get_name_addr(vdp->xdf_dip))); 2376 return (DDI_FAILURE); 2377 } 2378 2379 vreq->v_abuf = aba; 2380 vreq->v_align = abh; 2381 vreq->v_status = VREQ_DMAMEM_ALLOCED; 2382 2383 ASSERT(bufsz >= bp->b_bcount); 2384 if (!IS_READ(bp)) 2385 bcopy(bp->b_un.b_addr, vreq->v_abuf, 2386 bp->b_bcount); 2387 } 2388 /*FALLTHRU*/ 2389 2390 case VREQ_DMAMEM_ALLOCED: 2391 /* 2392 * dma bind 2393 */ 2394 if (ALIGNED_XFER(bp)) { 2395 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 2396 dma_flags, xdf_dmacallback, (caddr_t)vdp, 2397 &dc, &ndcs); 2398 } else { 2399 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 2400 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 2401 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 2402 } 2403 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 2404 /* get num of dma windows */ 2405 if (rc == DDI_DMA_PARTIAL_MAP) { 2406 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 2407 ASSERT(rc == DDI_SUCCESS); 2408 } else { 2409 ndws = 1; 2410 } 2411 } else { 2412 SETDMACBON(vdp); 2413 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 2414 ddi_get_name_addr(vdp->xdf_dip))); 2415 return (DDI_FAILURE); 2416 } 2417 2418 vreq->v_dmac = dc; 2419 vreq->v_dmaw = 0; 2420 vreq->v_ndmacs = ndcs; 2421 vreq->v_ndmaws = ndws; 2422 vreq->v_nslots = ndws; 2423 vreq->v_status = VREQ_DMABUF_BOUND; 2424 /*FALLTHRU*/ 2425 2426 case VREQ_DMABUF_BOUND: 2427 /* 2428 * get ge_slot, callback is set upon failure from gs_get(), 2429 * if not set previously 2430 */ 2431 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2432 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2433 ddi_get_name_addr(vdp->xdf_dip))); 2434 return (DDI_FAILURE); 2435 } 2436 2437 vreq->v_gs = gs; 2438 gs->vreq = vreq; 2439 vreq->v_status = VREQ_GS_ALLOCED; 2440 break; 2441 2442 case VREQ_GS_ALLOCED: 2443 /* nothing need to be done */ 2444 break; 2445 2446 case VREQ_DMAWIN_DONE: 2447 /* 2448 * move to the next dma window 2449 */ 2450 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 2451 2452 /* get a ge_slot for this DMA window */ 2453 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2454 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2455 ddi_get_name_addr(vdp->xdf_dip))); 2456 return (DDI_FAILURE); 2457 } 2458 2459 vreq->v_gs = gs; 2460 gs->vreq = vreq; 2461 vreq->v_dmaw++; 2462 rc = ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 2463 &vreq->v_dmac, &vreq->v_ndmacs); 2464 ASSERT(rc == DDI_SUCCESS); 2465 vreq->v_status = VREQ_GS_ALLOCED; 2466 break; 2467 2468 default: 2469 return (DDI_FAILURE); 2470 } 2471 2472 return (DDI_SUCCESS); 2473 } 2474 2475 static ge_slot_t * 2476 gs_get(xdf_t *vdp, int isread) 2477 { 2478 grant_ref_t gh; 2479 ge_slot_t *gs; 2480 2481 /* try to alloc GTEs needed in this slot, first */ 2482 if (gnttab_alloc_grant_references( 2483 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 2484 if (vdp->xdf_gnt_callback.next == NULL) { 2485 SETDMACBON(vdp); 2486 gnttab_request_free_callback( 2487 &vdp->xdf_gnt_callback, 2488 (void (*)(void *))xdf_dmacallback, 2489 (void *)vdp, 2490 BLKIF_MAX_SEGMENTS_PER_REQUEST); 2491 } 2492 return (NULL); 2493 } 2494 2495 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 2496 if (gs == NULL) { 2497 gnttab_free_grant_references(gh); 2498 if (vdp->xdf_timeout_id == 0) 2499 /* restart I/O after one second */ 2500 vdp->xdf_timeout_id = 2501 timeout(xdf_timeout_handler, vdp, hz); 2502 return (NULL); 2503 } 2504 2505 /* init gs_slot */ 2506 list_insert_head(&vdp->xdf_gs_act, (void *)gs); 2507 gs->oeid = vdp->xdf_peer; 2508 gs->isread = isread; 2509 gs->ghead = gh; 2510 gs->ngrefs = 0; 2511 2512 return (gs); 2513 } 2514 2515 static void 2516 gs_free(xdf_t *vdp, ge_slot_t *gs) 2517 { 2518 int i; 2519 grant_ref_t *gp = gs->ge; 2520 int ngrefs = gs->ngrefs; 2521 boolean_t isread = gs->isread; 2522 2523 list_remove(&vdp->xdf_gs_act, (void *)gs); 2524 2525 /* release all grant table entry resources used in this slot */ 2526 for (i = 0; i < ngrefs; i++, gp++) 2527 gnttab_end_foreign_access(*gp, !isread, 0); 2528 gnttab_free_grant_references(gs->ghead); 2529 2530 kmem_cache_free(xdf_gs_cache, (void *)gs); 2531 } 2532 2533 static grant_ref_t 2534 gs_grant(ge_slot_t *gs, mfn_t mfn) 2535 { 2536 grant_ref_t gr = gnttab_claim_grant_reference(&gs->ghead); 2537 2538 ASSERT(gr != -1); 2539 ASSERT(gs->ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 2540 gs->ge[gs->ngrefs++] = gr; 2541 gnttab_grant_foreign_access_ref(gr, gs->oeid, mfn, !gs->isread); 2542 2543 return (gr); 2544 } 2545 2546 static void 2547 unexpectedie(xdf_t *vdp) 2548 { 2549 /* clean up I/Os in ring that have responses */ 2550 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 2551 mutex_exit(&vdp->xdf_dev_lk); 2552 (void) xdf_intr((caddr_t)vdp); 2553 mutex_enter(&vdp->xdf_dev_lk); 2554 } 2555 2556 /* free up all grant table entries */ 2557 while (!list_is_empty(&vdp->xdf_gs_act)) 2558 gs_free(vdp, list_head(&vdp->xdf_gs_act)); 2559 2560 /* 2561 * move bp back to active list orderly 2562 * vreq_busy is updated in vreq_free() 2563 */ 2564 while (!list_is_empty(&vdp->xdf_vreq_act)) { 2565 v_req_t *vreq = list_head(&vdp->xdf_vreq_act); 2566 buf_t *bp = vreq->v_buf; 2567 2568 bp->av_back = NULL; 2569 bp->b_resid = bp->b_bcount; 2570 if (vdp->xdf_f_act == NULL) { 2571 vdp->xdf_f_act = vdp->xdf_l_act = bp; 2572 } else { 2573 /* move to the head of list */ 2574 bp->av_forw = vdp->xdf_f_act; 2575 vdp->xdf_f_act = bp; 2576 } 2577 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 2578 vreq_free(vdp, vreq); 2579 } 2580 } 2581 2582 static void 2583 xdfmin(struct buf *bp) 2584 { 2585 if (bp->b_bcount > xdf_maxphys) 2586 bp->b_bcount = xdf_maxphys; 2587 } 2588