1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * xdf.c - Xen Virtual Block Device Driver 29 * TODO: 30 * - support alternate block size (currently only DEV_BSIZE supported) 31 * - revalidate geometry for removable devices 32 */ 33 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/conf.h> 39 #include <sys/cmlb.h> 40 #include <sys/dkio.h> 41 #include <sys/promif.h> 42 #include <sys/sysmacros.h> 43 #include <sys/kstat.h> 44 #include <sys/mach_mmu.h> 45 #ifdef XPV_HVM_DRIVER 46 #include <sys/xpv_support.h> 47 #include <sys/sunndi.h> 48 #endif /* XPV_HVM_DRIVER */ 49 #include <public/io/xenbus.h> 50 #include <xen/sys/xenbus_impl.h> 51 #include <xen/sys/xendev.h> 52 #include <sys/gnttab.h> 53 #include <sys/scsi/generic/inquiry.h> 54 #include <xen/io/blkif_impl.h> 55 #include <io/xdf.h> 56 57 #define FLUSH_DISKCACHE 0x1 58 #define WRITE_BARRIER 0x2 59 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 60 #define USE_WRITE_BARRIER(vdp) \ 61 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 62 #define USE_FLUSH_DISKCACHE(vdp) \ 63 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 64 #define IS_WRITE_BARRIER(vdp, bp) \ 65 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 66 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 67 #define IS_FLUSH_DISKCACHE(bp) \ 68 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 69 70 static void *vbd_ss; 71 static kmem_cache_t *xdf_vreq_cache; 72 static kmem_cache_t *xdf_gs_cache; 73 static int xdf_maxphys = XB_MAXPHYS; 74 int xdfdebug = 0; 75 extern int do_polled_io; 76 diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 77 int xdf_barrier_flush_disable = 0; 78 79 /* 80 * dev_ops and cb_ops entrypoints 81 */ 82 static int xdf_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 83 static int xdf_attach(dev_info_t *, ddi_attach_cmd_t); 84 static int xdf_detach(dev_info_t *, ddi_detach_cmd_t); 85 static int xdf_reset(dev_info_t *, ddi_reset_cmd_t); 86 static int xdf_open(dev_t *, int, int, cred_t *); 87 static int xdf_close(dev_t, int, int, struct cred *); 88 static int xdf_strategy(struct buf *); 89 static int xdf_read(dev_t, struct uio *, cred_t *); 90 static int xdf_aread(dev_t, struct aio_req *, cred_t *); 91 static int xdf_write(dev_t, struct uio *, cred_t *); 92 static int xdf_awrite(dev_t, struct aio_req *, cred_t *); 93 static int xdf_dump(dev_t, caddr_t, daddr_t, int); 94 static int xdf_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 95 static uint_t xdf_intr(caddr_t); 96 static int xdf_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 97 caddr_t, int *); 98 99 /* 100 * misc private functions 101 */ 102 static int xdf_suspend(dev_info_t *); 103 static int xdf_resume(dev_info_t *); 104 static int xdf_start_connect(xdf_t *); 105 static int xdf_start_disconnect(xdf_t *); 106 static int xdf_post_connect(xdf_t *); 107 static void xdf_post_disconnect(xdf_t *); 108 static void xdf_oe_change(dev_info_t *, ddi_eventcookie_t, void *, void *); 109 static void xdf_iostart(xdf_t *); 110 static void xdf_iofini(xdf_t *, uint64_t, int); 111 static int xdf_prepare_rreq(xdf_t *, struct buf *, blkif_request_t *); 112 static int xdf_drain_io(xdf_t *); 113 static boolean_t xdf_isopen(xdf_t *, int); 114 static int xdf_check_state_transition(xdf_t *, XenbusState); 115 static int xdf_connect(xdf_t *, boolean_t); 116 static int xdf_dmacallback(caddr_t); 117 static void xdf_timeout_handler(void *); 118 static uint_t xdf_iorestart(caddr_t); 119 static v_req_t *vreq_get(xdf_t *, buf_t *); 120 static void vreq_free(xdf_t *, v_req_t *); 121 static int vreq_setup(xdf_t *, v_req_t *); 122 static ge_slot_t *gs_get(xdf_t *, int); 123 static void gs_free(xdf_t *, ge_slot_t *); 124 static grant_ref_t gs_grant(ge_slot_t *, mfn_t); 125 static void unexpectedie(xdf_t *); 126 static void xdfmin(struct buf *); 127 static void xdf_synthetic_pgeom(dev_info_t *, cmlb_geom_t *); 128 extern int xdf_kstat_create(dev_info_t *, char *, int); 129 extern void xdf_kstat_delete(dev_info_t *); 130 131 #if defined(XPV_HVM_DRIVER) 132 static void xdf_hvm_add(dev_info_t *); 133 static void xdf_hvm_rm(dev_info_t *); 134 static void xdf_hvm_init(void); 135 static void xdf_hvm_fini(void); 136 #endif /* XPV_HVM_DRIVER */ 137 138 static struct cb_ops xdf_cbops = { 139 xdf_open, 140 xdf_close, 141 xdf_strategy, 142 nodev, 143 xdf_dump, 144 xdf_read, 145 xdf_write, 146 xdf_ioctl, 147 nodev, 148 nodev, 149 nodev, 150 nochpoll, 151 xdf_prop_op, 152 NULL, 153 D_MP | D_NEW | D_64BIT, 154 CB_REV, 155 xdf_aread, 156 xdf_awrite 157 }; 158 159 struct dev_ops xdf_devops = { 160 DEVO_REV, /* devo_rev */ 161 0, /* devo_refcnt */ 162 xdf_getinfo, /* devo_getinfo */ 163 nulldev, /* devo_identify */ 164 nulldev, /* devo_probe */ 165 xdf_attach, /* devo_attach */ 166 xdf_detach, /* devo_detach */ 167 xdf_reset, /* devo_reset */ 168 &xdf_cbops, /* devo_cb_ops */ 169 (struct bus_ops *)NULL /* devo_bus_ops */ 170 }; 171 172 static struct modldrv modldrv = { 173 &mod_driverops, /* Type of module. This one is a driver */ 174 "virtual block driver %I%", /* short description */ 175 &xdf_devops /* driver specific ops */ 176 }; 177 178 static struct modlinkage xdf_modlinkage = { 179 MODREV_1, (void *)&modldrv, NULL 180 }; 181 182 /* 183 * I/O buffer DMA attributes 184 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 185 */ 186 static ddi_dma_attr_t xb_dma_attr = { 187 DMA_ATTR_V0, 188 (uint64_t)0, /* lowest address */ 189 (uint64_t)0xffffffffffffffff, /* highest usable address */ 190 (uint64_t)0xffffff, /* DMA counter limit max */ 191 (uint64_t)XB_BSIZE, /* alignment in bytes */ 192 XB_BSIZE - 1, /* bitmap of burst sizes */ 193 XB_BSIZE, /* min transfer */ 194 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 195 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 196 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 197 XB_BSIZE, /* granularity */ 198 0, /* flags (reserved) */ 199 }; 200 201 static ddi_device_acc_attr_t xc_acc_attr = { 202 DDI_DEVICE_ATTR_V0, 203 DDI_NEVERSWAP_ACC, 204 DDI_STRICTORDER_ACC 205 }; 206 207 /* callbacks from commmon label */ 208 209 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); 210 int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 211 212 static cmlb_tg_ops_t xdf_lb_ops = { 213 TG_DK_OPS_VERSION_1, 214 xdf_lb_rdwr, 215 xdf_lb_getinfo 216 }; 217 218 int 219 _init(void) 220 { 221 int rc; 222 223 if ((rc = ddi_soft_state_init(&vbd_ss, sizeof (xdf_t), 0)) != 0) 224 return (rc); 225 226 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 227 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 228 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 229 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 230 231 #if defined(XPV_HVM_DRIVER) 232 xdf_hvm_init(); 233 #endif /* XPV_HVM_DRIVER */ 234 235 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 236 #if defined(XPV_HVM_DRIVER) 237 xdf_hvm_fini(); 238 #endif /* XPV_HVM_DRIVER */ 239 kmem_cache_destroy(xdf_vreq_cache); 240 kmem_cache_destroy(xdf_gs_cache); 241 ddi_soft_state_fini(&vbd_ss); 242 return (rc); 243 } 244 245 return (rc); 246 } 247 248 int 249 _fini(void) 250 { 251 252 int err; 253 if ((err = mod_remove(&xdf_modlinkage)) != 0) 254 return (err); 255 256 #if defined(XPV_HVM_DRIVER) 257 xdf_hvm_fini(); 258 #endif /* XPV_HVM_DRIVER */ 259 260 kmem_cache_destroy(xdf_vreq_cache); 261 kmem_cache_destroy(xdf_gs_cache); 262 ddi_soft_state_fini(&vbd_ss); 263 264 return (0); 265 } 266 267 int 268 _info(struct modinfo *modinfop) 269 { 270 return (mod_info(&xdf_modlinkage, modinfop)); 271 } 272 273 /*ARGSUSED*/ 274 static int 275 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 276 { 277 int instance; 278 xdf_t *vbdp; 279 280 instance = XDF_INST(getminor((dev_t)arg)); 281 282 switch (cmd) { 283 case DDI_INFO_DEVT2DEVINFO: 284 if ((vbdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) { 285 *rp = NULL; 286 return (DDI_FAILURE); 287 } 288 *rp = vbdp->xdf_dip; 289 return (DDI_SUCCESS); 290 291 case DDI_INFO_DEVT2INSTANCE: 292 *rp = (void *)(uintptr_t)instance; 293 return (DDI_SUCCESS); 294 295 default: 296 return (DDI_FAILURE); 297 } 298 } 299 300 static int 301 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 302 char *name, caddr_t valuep, int *lengthp) 303 { 304 int instance = ddi_get_instance(dip); 305 xdf_t *vdp; 306 diskaddr_t p_blkcnt; 307 308 /* 309 * xdf dynamic properties are device specific and size oriented. 310 * Requests issued under conditions where size is valid are passed 311 * to ddi_prop_op_nblocks with the size information, otherwise the 312 * request is passed to ddi_prop_op. 313 */ 314 vdp = ddi_get_soft_state(vbd_ss, instance); 315 316 if ((dev == DDI_DEV_T_ANY) || (vdp == NULL)) 317 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 318 name, valuep, lengthp)); 319 320 /* do cv_wait until connected or failed */ 321 mutex_enter(&vdp->xdf_dev_lk); 322 if (xdf_connect(vdp, B_TRUE) != XD_READY) { 323 mutex_exit(&vdp->xdf_dev_lk); 324 goto out; 325 } 326 mutex_exit(&vdp->xdf_dev_lk); 327 328 if (cmlb_partinfo(vdp->xdf_vd_lbl, XDF_PART(getminor(dev)), &p_blkcnt, 329 NULL, NULL, NULL, NULL) == 0) 330 return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags, 331 name, valuep, lengthp, (uint64_t)p_blkcnt)); 332 333 out: 334 return (ddi_prop_op(dev, dip, prop_op, mod_flags, name, valuep, 335 lengthp)); 336 } 337 338 static int 339 xdf_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 340 { 341 xdf_t *vdp; 342 ddi_iblock_cookie_t softibc; 343 int instance; 344 345 xdfdebug = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_NOTPROM, 346 "xdfdebug", 0); 347 348 switch (cmd) { 349 case DDI_ATTACH: 350 break; 351 352 case DDI_RESUME: 353 return (xdf_resume(devi)); 354 355 default: 356 return (DDI_FAILURE); 357 } 358 359 instance = ddi_get_instance(devi); 360 if (ddi_soft_state_zalloc(vbd_ss, instance) != DDI_SUCCESS) 361 return (DDI_FAILURE); 362 363 DPRINTF(DDI_DBG, ("xdf%d: attaching\n", instance)); 364 vdp = ddi_get_soft_state(vbd_ss, instance); 365 ddi_set_driver_private(devi, vdp); 366 vdp->xdf_dip = devi; 367 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 368 369 if (ddi_get_iblock_cookie(devi, 0, &vdp->xdf_ibc) != DDI_SUCCESS) { 370 cmn_err(CE_WARN, "xdf@%s: failed to get iblock cookie", 371 ddi_get_name_addr(devi)); 372 goto errout0; 373 } 374 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)vdp->xdf_ibc); 375 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)vdp->xdf_ibc); 376 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, 377 (void *)vdp->xdf_ibc); 378 379 if (ddi_get_soft_iblock_cookie(devi, DDI_SOFTINT_LOW, &softibc) 380 != DDI_SUCCESS) { 381 cmn_err(CE_WARN, "xdf@%s: failed to get softintr iblock cookie", 382 ddi_get_name_addr(devi)); 383 goto errout0; 384 } 385 if (ddi_add_softintr(devi, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 386 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 387 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 388 ddi_get_name_addr(devi)); 389 goto errout0; 390 } 391 392 #if !defined(XPV_HVM_DRIVER) 393 /* create kstat for iostat(1M) */ 394 if (xdf_kstat_create(devi, "xdf", instance) != 0) { 395 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 396 ddi_get_name_addr(devi)); 397 goto errout0; 398 } 399 #endif /* !XPV_HVM_DRIVER */ 400 401 /* driver handles kernel-issued IOCTLs */ 402 if (ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP, 403 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 404 cmn_err(CE_WARN, "xdf@%s: cannot create DDI_KERNEL_IOCTL prop", 405 ddi_get_name_addr(devi)); 406 goto errout0; 407 } 408 409 /* 410 * Initialize the physical geometry stucture. Note that currently 411 * we don't know the size of the backend device so the number 412 * of blocks on the device will be initialized to zero. Once 413 * we connect to the backend device we'll update the physical 414 * geometry to reflect the real size of the device. 415 */ 416 xdf_synthetic_pgeom(devi, &vdp->xdf_pgeom); 417 418 /* 419 * create default device minor nodes: non-removable disk 420 * we will adjust minor nodes after we are connected w/ backend 421 */ 422 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 423 if (cmlb_attach(devi, &xdf_lb_ops, DTYPE_DIRECT, 0, 1, 424 DDI_NT_BLOCK_XVMD, 425 #if defined(XPV_HVM_DRIVER) 426 CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT | 427 CMLB_INTERNAL_MINOR_NODES, 428 #else /* !XPV_HVM_DRIVER */ 429 CMLB_FAKE_LABEL_ONE_PARTITION, 430 #endif /* !XPV_HVM_DRIVER */ 431 vdp->xdf_vd_lbl, NULL) != 0) { 432 cmn_err(CE_WARN, "xdf@%s: default cmlb attach failed", 433 ddi_get_name_addr(devi)); 434 goto errout0; 435 } 436 437 /* 438 * We ship with cache-enabled disks 439 */ 440 vdp->xdf_wce = 1; 441 442 mutex_enter(&vdp->xdf_cb_lk); 443 444 /* Watch backend XenbusState change */ 445 if (xvdi_add_event_handler(devi, XS_OE_STATE, 446 xdf_oe_change) != DDI_SUCCESS) { 447 mutex_exit(&vdp->xdf_cb_lk); 448 goto errout0; 449 } 450 451 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 452 cmn_err(CE_WARN, "xdf@%s: start connection failed", 453 ddi_get_name_addr(devi)); 454 (void) xdf_start_disconnect(vdp); 455 mutex_exit(&vdp->xdf_cb_lk); 456 goto errout1; 457 } 458 459 mutex_exit(&vdp->xdf_cb_lk); 460 461 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 462 offsetof(v_req_t, v_link)); 463 list_create(&vdp->xdf_gs_act, sizeof (ge_slot_t), 464 offsetof(ge_slot_t, link)); 465 466 #if defined(XPV_HVM_DRIVER) 467 xdf_hvm_add(devi); 468 469 (void) ddi_prop_update_int(DDI_DEV_T_NONE, devi, DDI_NO_AUTODETACH, 1); 470 471 /* 472 * Report our version to dom0. 473 */ 474 if (xenbus_printf(XBT_NULL, "hvmpv/xdf", "version", "%d", 475 HVMPV_XDF_VERS)) 476 cmn_err(CE_WARN, "xdf: couldn't write version\n"); 477 #endif /* XPV_HVM_DRIVER */ 478 479 ddi_report_dev(devi); 480 481 DPRINTF(DDI_DBG, ("xdf%d: attached\n", instance)); 482 483 return (DDI_SUCCESS); 484 485 errout1: 486 xvdi_remove_event_handler(devi, XS_OE_STATE); 487 errout0: 488 if (vdp->xdf_vd_lbl != NULL) { 489 cmlb_detach(vdp->xdf_vd_lbl, NULL); 490 cmlb_free_handle(&vdp->xdf_vd_lbl); 491 } 492 #if !defined(XPV_HVM_DRIVER) 493 xdf_kstat_delete(devi); 494 #endif /* !XPV_HVM_DRIVER */ 495 if (vdp->xdf_softintr_id != NULL) 496 ddi_remove_softintr(vdp->xdf_softintr_id); 497 if (vdp->xdf_ibc != NULL) { 498 mutex_destroy(&vdp->xdf_cb_lk); 499 mutex_destroy(&vdp->xdf_dev_lk); 500 } 501 cv_destroy(&vdp->xdf_dev_cv); 502 ddi_soft_state_free(vbd_ss, instance); 503 ddi_set_driver_private(devi, NULL); 504 ddi_prop_remove_all(devi); 505 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(devi)); 506 return (DDI_FAILURE); 507 } 508 509 static int 510 xdf_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 511 { 512 xdf_t *vdp; 513 int instance; 514 515 switch (cmd) { 516 517 case DDI_PM_SUSPEND: 518 break; 519 520 case DDI_SUSPEND: 521 return (xdf_suspend(devi)); 522 523 case DDI_DETACH: 524 break; 525 526 default: 527 return (DDI_FAILURE); 528 } 529 530 instance = ddi_get_instance(devi); 531 DPRINTF(DDI_DBG, ("xdf%d: detaching\n", instance)); 532 vdp = ddi_get_soft_state(vbd_ss, instance); 533 534 if (vdp == NULL) 535 return (DDI_FAILURE); 536 537 mutex_enter(&vdp->xdf_dev_lk); 538 if (xdf_isopen(vdp, -1)) { 539 mutex_exit(&vdp->xdf_dev_lk); 540 return (DDI_FAILURE); 541 } 542 543 if (vdp->xdf_status != XD_CLOSED) { 544 mutex_exit(&vdp->xdf_dev_lk); 545 return (DDI_FAILURE); 546 } 547 548 #if defined(XPV_HVM_DRIVER) 549 xdf_hvm_rm(devi); 550 #endif /* XPV_HVM_DRIVER */ 551 552 ASSERT(!ISDMACBON(vdp)); 553 mutex_exit(&vdp->xdf_dev_lk); 554 555 if (vdp->xdf_timeout_id != 0) 556 (void) untimeout(vdp->xdf_timeout_id); 557 558 xvdi_remove_event_handler(devi, XS_OE_STATE); 559 560 /* we'll support backend running in domU later */ 561 #ifdef DOMU_BACKEND 562 (void) xvdi_post_event(devi, XEN_HP_REMOVE); 563 #endif 564 565 list_destroy(&vdp->xdf_vreq_act); 566 list_destroy(&vdp->xdf_gs_act); 567 ddi_prop_remove_all(devi); 568 xdf_kstat_delete(devi); 569 ddi_remove_softintr(vdp->xdf_softintr_id); 570 ddi_set_driver_private(devi, NULL); 571 cv_destroy(&vdp->xdf_dev_cv); 572 mutex_destroy(&vdp->xdf_cb_lk); 573 mutex_destroy(&vdp->xdf_dev_lk); 574 if (vdp->xdf_cache_flush_block != NULL) 575 kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE); 576 ddi_soft_state_free(vbd_ss, instance); 577 return (DDI_SUCCESS); 578 } 579 580 static int 581 xdf_suspend(dev_info_t *devi) 582 { 583 xdf_t *vdp; 584 int instance; 585 enum xdf_state st; 586 587 instance = ddi_get_instance(devi); 588 589 if (xdfdebug & SUSRES_DBG) 590 xen_printf("xdf_suspend: xdf#%d\n", instance); 591 592 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 593 return (DDI_FAILURE); 594 595 xvdi_suspend(devi); 596 597 mutex_enter(&vdp->xdf_cb_lk); 598 mutex_enter(&vdp->xdf_dev_lk); 599 st = vdp->xdf_status; 600 /* change status to stop further I/O requests */ 601 if (st == XD_READY) 602 vdp->xdf_status = XD_SUSPEND; 603 mutex_exit(&vdp->xdf_dev_lk); 604 mutex_exit(&vdp->xdf_cb_lk); 605 606 /* make sure no more I/O responses left in the ring buffer */ 607 if ((st == XD_INIT) || (st == XD_READY)) { 608 #ifdef XPV_HVM_DRIVER 609 ec_unbind_evtchn(vdp->xdf_evtchn); 610 xvdi_free_evtchn(devi); 611 #else /* !XPV_HVM_DRIVER */ 612 (void) ddi_remove_intr(devi, 0, NULL); 613 #endif /* !XPV_HVM_DRIVER */ 614 (void) xdf_drain_io(vdp); 615 /* 616 * no need to teardown the ring buffer here 617 * it will be simply re-init'ed during resume when 618 * we call xvdi_alloc_ring 619 */ 620 } 621 622 if (xdfdebug & SUSRES_DBG) 623 xen_printf("xdf_suspend: SUCCESS\n"); 624 625 return (DDI_SUCCESS); 626 } 627 628 /*ARGSUSED*/ 629 static int 630 xdf_resume(dev_info_t *devi) 631 { 632 xdf_t *vdp; 633 int instance; 634 635 instance = ddi_get_instance(devi); 636 if (xdfdebug & SUSRES_DBG) 637 xen_printf("xdf_resume: xdf%d\n", instance); 638 639 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 640 return (DDI_FAILURE); 641 642 mutex_enter(&vdp->xdf_cb_lk); 643 644 if (xvdi_resume(devi) != DDI_SUCCESS) { 645 mutex_exit(&vdp->xdf_cb_lk); 646 return (DDI_FAILURE); 647 } 648 649 mutex_enter(&vdp->xdf_dev_lk); 650 ASSERT(vdp->xdf_status != XD_READY); 651 vdp->xdf_status = XD_UNKNOWN; 652 mutex_exit(&vdp->xdf_dev_lk); 653 654 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 655 mutex_exit(&vdp->xdf_cb_lk); 656 return (DDI_FAILURE); 657 } 658 659 mutex_exit(&vdp->xdf_cb_lk); 660 661 if (xdfdebug & SUSRES_DBG) 662 xen_printf("xdf_resume: done\n"); 663 return (DDI_SUCCESS); 664 } 665 666 /*ARGSUSED*/ 667 static int 668 xdf_reset(dev_info_t *devi, ddi_reset_cmd_t cmd) 669 { 670 xdf_t *vdp; 671 int instance; 672 673 instance = ddi_get_instance(devi); 674 DPRINTF(DDI_DBG, ("xdf%d: resetting\n", instance)); 675 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 676 return (DDI_FAILURE); 677 678 /* 679 * wait for any outstanding I/O to complete 680 */ 681 (void) xdf_drain_io(vdp); 682 683 DPRINTF(DDI_DBG, ("xdf%d: reset complete\n", instance)); 684 return (DDI_SUCCESS); 685 } 686 687 static int 688 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 689 { 690 minor_t minor; 691 xdf_t *vdp; 692 int part; 693 ulong_t parbit; 694 diskaddr_t p_blkct = 0; 695 boolean_t firstopen; 696 boolean_t nodelay; 697 698 minor = getminor(*devp); 699 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 700 return (ENXIO); 701 702 nodelay = (flag & (FNDELAY | FNONBLOCK)); 703 704 DPRINTF(DDI_DBG, ("xdf%d: opening\n", XDF_INST(minor))); 705 706 /* do cv_wait until connected or failed */ 707 mutex_enter(&vdp->xdf_dev_lk); 708 if (!nodelay && (xdf_connect(vdp, B_TRUE) != XD_READY)) { 709 mutex_exit(&vdp->xdf_dev_lk); 710 return (ENXIO); 711 } 712 713 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 714 mutex_exit(&vdp->xdf_dev_lk); 715 return (EROFS); 716 } 717 718 part = XDF_PART(minor); 719 parbit = 1 << part; 720 if ((vdp->xdf_vd_exclopen & parbit) || 721 ((flag & FEXCL) && xdf_isopen(vdp, part))) { 722 mutex_exit(&vdp->xdf_dev_lk); 723 return (EBUSY); 724 } 725 726 /* are we the first one to open this node? */ 727 firstopen = !xdf_isopen(vdp, -1); 728 729 if (otyp == OTYP_LYR) 730 vdp->xdf_vd_lyropen[part]++; 731 732 vdp->xdf_vd_open[otyp] |= parbit; 733 734 if (flag & FEXCL) 735 vdp->xdf_vd_exclopen |= parbit; 736 737 mutex_exit(&vdp->xdf_dev_lk); 738 739 /* force a re-validation */ 740 if (firstopen) 741 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 742 743 /* 744 * check size 745 * ignore CD/DVD which contains a zero-sized s0 746 */ 747 if (!nodelay && !XD_IS_CD(vdp) && 748 ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 749 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0))) { 750 (void) xdf_close(*devp, flag, otyp, credp); 751 return (ENXIO); 752 } 753 754 return (0); 755 } 756 757 /*ARGSUSED*/ 758 static int 759 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 760 { 761 minor_t minor; 762 xdf_t *vdp; 763 int part; 764 ulong_t parbit; 765 766 minor = getminor(dev); 767 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 768 return (ENXIO); 769 770 mutex_enter(&vdp->xdf_dev_lk); 771 part = XDF_PART(minor); 772 if (!xdf_isopen(vdp, part)) { 773 mutex_exit(&vdp->xdf_dev_lk); 774 return (ENXIO); 775 } 776 parbit = 1 << part; 777 778 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); 779 if (otyp == OTYP_LYR) { 780 ASSERT(vdp->xdf_vd_lyropen[part] > 0); 781 if (--vdp->xdf_vd_lyropen[part] == 0) 782 vdp->xdf_vd_open[otyp] &= ~parbit; 783 } else { 784 vdp->xdf_vd_open[otyp] &= ~parbit; 785 } 786 vdp->xdf_vd_exclopen &= ~parbit; 787 788 mutex_exit(&vdp->xdf_dev_lk); 789 return (0); 790 } 791 792 static int 793 xdf_strategy(struct buf *bp) 794 { 795 xdf_t *vdp; 796 minor_t minor; 797 diskaddr_t p_blkct, p_blkst; 798 ulong_t nblks; 799 int part; 800 801 minor = getminor(bp->b_edev); 802 part = XDF_PART(minor); 803 804 vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)); 805 if ((vdp == NULL) || !xdf_isopen(vdp, part)) { 806 bioerror(bp, ENXIO); 807 bp->b_resid = bp->b_bcount; 808 biodone(bp); 809 return (0); 810 } 811 812 /* Check for writes to a read only device */ 813 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 814 bioerror(bp, EROFS); 815 bp->b_resid = bp->b_bcount; 816 biodone(bp); 817 return (0); 818 } 819 820 /* Check if this I/O is accessing a partition or the entire disk */ 821 if ((long)bp->b_private == XB_SLICE_NONE) { 822 /* This I/O is using an absolute offset */ 823 p_blkct = vdp->xdf_xdev_nblocks; 824 p_blkst = 0; 825 } else { 826 /* This I/O is using a partition relative offset */ 827 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 828 &p_blkst, NULL, NULL, NULL)) { 829 bioerror(bp, ENXIO); 830 bp->b_resid = bp->b_bcount; 831 biodone(bp); 832 return (0); 833 } 834 } 835 836 /* check for a starting block beyond the disk or partition limit */ 837 if (bp->b_blkno > p_blkct) { 838 DPRINTF(IO_DBG, ("xdf: block %lld exceeds VBD size %"PRIu64, 839 (longlong_t)bp->b_blkno, (uint64_t)p_blkct)); 840 bioerror(bp, EINVAL); 841 bp->b_resid = bp->b_bcount; 842 biodone(bp); 843 return (0); 844 } 845 846 /* Legacy: don't set error flag at this case */ 847 if (bp->b_blkno == p_blkct) { 848 bp->b_resid = bp->b_bcount; 849 biodone(bp); 850 return (0); 851 } 852 853 /* Adjust for partial transfer */ 854 nblks = bp->b_bcount >> XB_BSHIFT; 855 if ((bp->b_blkno + nblks) > p_blkct) { 856 bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT; 857 bp->b_bcount -= bp->b_resid; 858 } 859 860 DPRINTF(IO_DBG, ("xdf: strategy blk %lld len %lu\n", 861 (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount)); 862 863 /* Fix up the buf struct */ 864 bp->b_flags |= B_BUSY; 865 bp->av_forw = bp->av_back = NULL; /* not tagged with a v_req */ 866 bp->b_private = (void *)(uintptr_t)p_blkst; 867 868 mutex_enter(&vdp->xdf_dev_lk); 869 if (vdp->xdf_xdev_iostat != NULL) 870 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 871 if (vdp->xdf_f_act == NULL) { 872 vdp->xdf_f_act = vdp->xdf_l_act = bp; 873 } else { 874 vdp->xdf_l_act->av_forw = bp; 875 vdp->xdf_l_act = bp; 876 } 877 mutex_exit(&vdp->xdf_dev_lk); 878 879 xdf_iostart(vdp); 880 if (do_polled_io) 881 (void) xdf_drain_io(vdp); 882 return (0); 883 } 884 885 /*ARGSUSED*/ 886 static int 887 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 888 { 889 890 xdf_t *vdp; 891 minor_t minor; 892 diskaddr_t p_blkcnt; 893 int part; 894 895 minor = getminor(dev); 896 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 897 return (ENXIO); 898 899 DPRINTF(IO_DBG, ("xdf: read offset 0x%"PRIx64"\n", 900 (int64_t)uiop->uio_offset)); 901 902 part = XDF_PART(minor); 903 if (!xdf_isopen(vdp, part)) 904 return (ENXIO); 905 906 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 907 NULL, NULL, NULL, NULL)) 908 return (ENXIO); 909 910 if (U_INVAL(uiop)) 911 return (EINVAL); 912 913 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 914 } 915 916 /*ARGSUSED*/ 917 static int 918 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 919 { 920 xdf_t *vdp; 921 minor_t minor; 922 diskaddr_t p_blkcnt; 923 int part; 924 925 minor = getminor(dev); 926 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 927 return (ENXIO); 928 929 DPRINTF(IO_DBG, ("xdf: write offset 0x%"PRIx64"\n", 930 (int64_t)uiop->uio_offset)); 931 932 part = XDF_PART(minor); 933 if (!xdf_isopen(vdp, part)) 934 return (ENXIO); 935 936 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 937 NULL, NULL, NULL, NULL)) 938 return (ENXIO); 939 940 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 941 return (ENOSPC); 942 943 if (U_INVAL(uiop)) 944 return (EINVAL); 945 946 return (physio(xdf_strategy, NULL, dev, B_WRITE, minphys, uiop)); 947 } 948 949 /*ARGSUSED*/ 950 static int 951 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 952 { 953 xdf_t *vdp; 954 minor_t minor; 955 struct uio *uiop = aiop->aio_uio; 956 diskaddr_t p_blkcnt; 957 int part; 958 959 minor = getminor(dev); 960 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 961 return (ENXIO); 962 963 part = XDF_PART(minor); 964 if (!xdf_isopen(vdp, part)) 965 return (ENXIO); 966 967 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 968 NULL, NULL, NULL, NULL)) 969 return (ENXIO); 970 971 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 972 return (ENOSPC); 973 974 if (U_INVAL(uiop)) 975 return (EINVAL); 976 977 return (aphysio(xdf_strategy, anocancel, dev, B_READ, minphys, aiop)); 978 } 979 980 /*ARGSUSED*/ 981 static int 982 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 983 { 984 xdf_t *vdp; 985 minor_t minor; 986 struct uio *uiop = aiop->aio_uio; 987 diskaddr_t p_blkcnt; 988 int part; 989 990 minor = getminor(dev); 991 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 992 return (ENXIO); 993 994 part = XDF_PART(minor); 995 if (!xdf_isopen(vdp, part)) 996 return (ENXIO); 997 998 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 999 NULL, NULL, NULL, NULL)) 1000 return (ENXIO); 1001 1002 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 1003 return (ENOSPC); 1004 1005 if (U_INVAL(uiop)) 1006 return (EINVAL); 1007 1008 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, minphys, aiop)); 1009 } 1010 1011 static int 1012 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1013 { 1014 struct buf dumpbuf, *dbp; 1015 xdf_t *vdp; 1016 minor_t minor; 1017 int err = 0; 1018 int part; 1019 diskaddr_t p_blkcnt, p_blkst; 1020 1021 minor = getminor(dev); 1022 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 1023 return (ENXIO); 1024 1025 DPRINTF(IO_DBG, ("xdf: dump addr (0x%p) blk (%ld) nblks (%d)\n", 1026 addr, blkno, nblk)); 1027 1028 part = XDF_PART(minor); 1029 if (!xdf_isopen(vdp, part)) 1030 return (ENXIO); 1031 1032 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 1033 NULL, NULL, NULL)) 1034 return (ENXIO); 1035 1036 if ((blkno + nblk) > p_blkcnt) { 1037 cmn_err(CE_WARN, "xdf: block %ld exceeds VBD size %"PRIu64, 1038 blkno + nblk, (uint64_t)p_blkcnt); 1039 return (EINVAL); 1040 } 1041 1042 dbp = &dumpbuf; 1043 bioinit(dbp); 1044 dbp->b_flags = B_BUSY; 1045 dbp->b_un.b_addr = addr; 1046 dbp->b_bcount = nblk << DEV_BSHIFT; 1047 dbp->b_blkno = blkno; 1048 dbp->b_edev = dev; 1049 dbp->b_private = (void *)(uintptr_t)p_blkst; 1050 1051 mutex_enter(&vdp->xdf_dev_lk); 1052 if (vdp->xdf_xdev_iostat != NULL) 1053 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1054 if (vdp->xdf_f_act == NULL) { 1055 vdp->xdf_f_act = vdp->xdf_l_act = dbp; 1056 } else { 1057 vdp->xdf_l_act->av_forw = dbp; 1058 vdp->xdf_l_act = dbp; 1059 } 1060 dbp->av_forw = NULL; 1061 dbp->av_back = NULL; 1062 mutex_exit(&vdp->xdf_dev_lk); 1063 xdf_iostart(vdp); 1064 err = xdf_drain_io(vdp); 1065 biofini(dbp); 1066 return (err); 1067 } 1068 1069 /*ARGSUSED*/ 1070 static int 1071 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 1072 int *rvalp) 1073 { 1074 int instance; 1075 xdf_t *vdp; 1076 minor_t minor; 1077 int part; 1078 1079 minor = getminor(dev); 1080 instance = XDF_INST(minor); 1081 1082 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 1083 return (ENXIO); 1084 1085 DPRINTF(IOCTL_DBG, ("xdf%d:ioctl: cmd %d (0x%x)\n", 1086 instance, cmd, cmd)); 1087 1088 part = XDF_PART(minor); 1089 if (!xdf_isopen(vdp, part)) 1090 return (ENXIO); 1091 1092 switch (cmd) { 1093 case DKIOCGMEDIAINFO: { 1094 struct dk_minfo media_info; 1095 1096 media_info.dki_lbsize = DEV_BSIZE; 1097 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; 1098 media_info.dki_media_type = DK_FIXED_DISK; 1099 1100 if (ddi_copyout(&media_info, (void *)arg, 1101 sizeof (struct dk_minfo), mode)) { 1102 return (EFAULT); 1103 } else { 1104 return (0); 1105 } 1106 } 1107 1108 case DKIOCINFO: { 1109 struct dk_cinfo info; 1110 1111 /* controller information */ 1112 if (XD_IS_CD(vdp)) 1113 info.dki_ctype = DKC_CDROM; 1114 else 1115 info.dki_ctype = DKC_VBD; 1116 1117 info.dki_cnum = 0; 1118 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 1119 1120 /* unit information */ 1121 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 1122 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 1123 info.dki_flags = DKI_FMTVOL; 1124 info.dki_partition = part; 1125 info.dki_maxtransfer = maxphys / DEV_BSIZE; 1126 info.dki_addr = 0; 1127 info.dki_space = 0; 1128 info.dki_prio = 0; 1129 info.dki_vec = 0; 1130 1131 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 1132 return (EFAULT); 1133 else 1134 return (0); 1135 } 1136 1137 case DKIOCSTATE: { 1138 enum dkio_state dkstate = DKIO_INSERTED; 1139 if (ddi_copyout(&dkstate, (void *)arg, sizeof (dkstate), 1140 mode) != 0) 1141 return (EFAULT); 1142 return (0); 1143 } 1144 1145 /* 1146 * is media removable? 1147 */ 1148 case DKIOCREMOVABLE: { 1149 int i = XD_IS_RM(vdp) ? 1 : 0; 1150 if (ddi_copyout(&i, (caddr_t)arg, sizeof (int), mode)) 1151 return (EFAULT); 1152 return (0); 1153 } 1154 1155 case DKIOCG_PHYGEOM: 1156 case DKIOCG_VIRTGEOM: 1157 case DKIOCGGEOM: 1158 case DKIOCSGEOM: 1159 case DKIOCGAPART: 1160 case DKIOCSAPART: 1161 case DKIOCGVTOC: 1162 case DKIOCSVTOC: 1163 case DKIOCPARTINFO: 1164 case DKIOCGMBOOT: 1165 case DKIOCSMBOOT: 1166 case DKIOCGETEFI: 1167 case DKIOCSETEFI: 1168 case DKIOCPARTITION: { 1169 int rc; 1170 1171 rc = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 1172 rvalp, NULL); 1173 return (rc); 1174 } 1175 1176 case DKIOCGETWCE: 1177 if (ddi_copyout(&vdp->xdf_wce, (void *)arg, 1178 sizeof (vdp->xdf_wce), mode)) 1179 return (EFAULT); 1180 return (0); 1181 case DKIOCSETWCE: 1182 if (ddi_copyin((void *)arg, &vdp->xdf_wce, 1183 sizeof (vdp->xdf_wce), mode)) 1184 return (EFAULT); 1185 return (0); 1186 case DKIOCFLUSHWRITECACHE: { 1187 int rc; 1188 struct dk_callback *dkc = (struct dk_callback *)arg; 1189 1190 if (vdp->xdf_flush_supported) { 1191 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1192 NULL, 0, 0, (void *)dev); 1193 } else if (vdp->xdf_feature_barrier && 1194 !xdf_barrier_flush_disable) { 1195 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1196 vdp->xdf_cache_flush_block, xdf_flush_block, 1197 DEV_BSIZE, (void *)dev); 1198 } else { 1199 return (ENOTTY); 1200 } 1201 if ((mode & FKIOCTL) && (dkc != NULL) && 1202 (dkc->dkc_callback != NULL)) { 1203 (*dkc->dkc_callback)(dkc->dkc_cookie, rc); 1204 /* need to return 0 after calling callback */ 1205 rc = 0; 1206 } 1207 return (rc); 1208 } 1209 1210 default: 1211 return (ENOTTY); 1212 } 1213 } 1214 1215 /* 1216 * xdf interrupt handler 1217 */ 1218 static uint_t 1219 xdf_intr(caddr_t arg) 1220 { 1221 xdf_t *vdp = (xdf_t *)arg; 1222 xendev_ring_t *xbr; 1223 blkif_response_t *resp; 1224 int bioerr; 1225 uint64_t id; 1226 extern int do_polled_io; 1227 uint8_t op; 1228 uint16_t status; 1229 ddi_acc_handle_t acchdl; 1230 1231 mutex_enter(&vdp->xdf_dev_lk); 1232 1233 if ((xbr = vdp->xdf_xb_ring) == NULL) { 1234 mutex_exit(&vdp->xdf_dev_lk); 1235 return (DDI_INTR_UNCLAIMED); 1236 } 1237 1238 acchdl = vdp->xdf_xb_ring_hdl; 1239 1240 /* 1241 * complete all requests which have a response 1242 */ 1243 while (resp = xvdi_ring_get_response(xbr)) { 1244 id = ddi_get64(acchdl, &resp->id); 1245 op = ddi_get8(acchdl, &resp->operation); 1246 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 1247 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 1248 op, id, status)); 1249 1250 /* 1251 * XXPV - close connection to the backend and restart 1252 */ 1253 if (status != BLKIF_RSP_OKAY) { 1254 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 1255 ddi_get_name_addr(vdp->xdf_dip), 1256 (op == BLKIF_OP_READ) ? "reading" : "writing")); 1257 bioerr = EIO; 1258 } else { 1259 bioerr = 0; 1260 } 1261 1262 xdf_iofini(vdp, id, bioerr); 1263 } 1264 1265 mutex_exit(&vdp->xdf_dev_lk); 1266 1267 if (!do_polled_io) 1268 xdf_iostart(vdp); 1269 1270 return (DDI_INTR_CLAIMED); 1271 } 1272 1273 int xdf_fbrewrites; /* how many times was our flush block rewritten */ 1274 1275 /* 1276 * Snarf new data if our flush block was re-written 1277 */ 1278 static void 1279 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 1280 { 1281 int nblks; 1282 boolean_t mapin; 1283 1284 if (IS_WRITE_BARRIER(vdp, bp)) 1285 return; /* write was a flush write */ 1286 1287 mapin = B_FALSE; 1288 nblks = bp->b_bcount >> DEV_BSHIFT; 1289 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 1290 xdf_fbrewrites++; 1291 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 1292 mapin = B_TRUE; 1293 bp_mapin(bp); 1294 } 1295 bcopy(bp->b_un.b_addr + 1296 ((xdf_flush_block - blkno) << DEV_BSHIFT), 1297 vdp->xdf_cache_flush_block, DEV_BSIZE); 1298 if (mapin) 1299 bp_mapout(bp); 1300 } 1301 } 1302 1303 static void 1304 xdf_iofini(xdf_t *vdp, uint64_t id, int bioerr) 1305 { 1306 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 1307 v_req_t *vreq = gs->vreq; 1308 buf_t *bp = vreq->v_buf; 1309 1310 gs_free(vdp, gs); 1311 if (bioerr) 1312 bioerror(bp, bioerr); 1313 vreq->v_nslots--; 1314 if (vreq->v_nslots != 0) 1315 return; 1316 1317 XDF_UPDATE_IO_STAT(vdp, bp); 1318 if (vdp->xdf_xdev_iostat != NULL) 1319 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1320 1321 if (IS_ERROR(bp)) 1322 bp->b_resid = bp->b_bcount; 1323 1324 vreq_free(vdp, vreq); 1325 biodone(bp); 1326 } 1327 1328 /* 1329 * return value of xdf_prepare_rreq() 1330 * used in xdf_iostart() 1331 */ 1332 #define XF_PARTIAL 0 /* rreq is full, not all I/O in buf transferred */ 1333 #define XF_COMP 1 /* no more I/O left in buf */ 1334 1335 static void 1336 xdf_iostart(xdf_t *vdp) 1337 { 1338 xendev_ring_t *xbr; 1339 struct buf *bp; 1340 blkif_request_t *rreq; 1341 int retval; 1342 int rreqready = 0; 1343 1344 xbr = vdp->xdf_xb_ring; 1345 1346 /* 1347 * populate the ring request(s) 1348 * 1349 * loop until there is no buf to transfer or no free slot 1350 * available in I/O ring 1351 */ 1352 mutex_enter(&vdp->xdf_dev_lk); 1353 1354 for (;;) { 1355 if (vdp->xdf_status != XD_READY) 1356 break; 1357 1358 /* active buf queue empty? */ 1359 if ((bp = vdp->xdf_f_act) == NULL) 1360 break; 1361 1362 /* try to grab a vreq for this bp */ 1363 if ((BP2VREQ(bp) == NULL) && (vreq_get(vdp, bp) == NULL)) 1364 break; 1365 /* alloc DMA/GTE resources */ 1366 if (vreq_setup(vdp, BP2VREQ(bp)) != DDI_SUCCESS) 1367 break; 1368 1369 /* get next blkif_request in the ring */ 1370 if ((rreq = xvdi_ring_get_request(xbr)) == NULL) 1371 break; 1372 bzero(rreq, sizeof (blkif_request_t)); 1373 1374 /* populate blkif_request with this buf */ 1375 rreqready++; 1376 retval = xdf_prepare_rreq(vdp, bp, rreq); 1377 if (retval == XF_COMP) { 1378 /* finish this bp, switch to next one */ 1379 if (vdp->xdf_xdev_iostat != NULL) 1380 kstat_waitq_to_runq( 1381 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1382 vdp->xdf_f_act = bp->av_forw; 1383 bp->av_forw = NULL; 1384 } 1385 } 1386 1387 /* 1388 * Send the request(s) to the backend 1389 */ 1390 if (rreqready) { 1391 if (xvdi_ring_push_request(xbr)) { 1392 DPRINTF(IO_DBG, ("xdf_iostart: " 1393 "sent request(s) to backend\n")); 1394 xvdi_notify_oe(vdp->xdf_dip); 1395 } 1396 } 1397 1398 mutex_exit(&vdp->xdf_dev_lk); 1399 } 1400 1401 /* 1402 * populate a single blkif_request_t w/ a buf 1403 */ 1404 static int 1405 xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1406 { 1407 int rval; 1408 grant_ref_t gr; 1409 uint8_t fsect, lsect; 1410 size_t bcnt; 1411 paddr_t dma_addr; 1412 off_t blk_off; 1413 dev_info_t *dip = vdp->xdf_dip; 1414 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1415 v_req_t *vreq = BP2VREQ(bp); 1416 uint64_t blkno = vreq->v_blkno; 1417 uint_t ndmacs = vreq->v_ndmacs; 1418 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1419 int seg = 0; 1420 int isread = IS_READ(bp); 1421 1422 if (isread) 1423 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1424 else { 1425 switch (vreq->v_flush_diskcache) { 1426 case FLUSH_DISKCACHE: 1427 ddi_put8(acchdl, &rreq->operation, 1428 BLKIF_OP_FLUSH_DISKCACHE); 1429 ddi_put16(acchdl, &rreq->handle, vdev); 1430 ddi_put64(acchdl, &rreq->id, 1431 (uint64_t)(uintptr_t)(vreq->v_gs)); 1432 ddi_put8(acchdl, &rreq->nr_segments, 0); 1433 return (XF_COMP); 1434 case WRITE_BARRIER: 1435 ddi_put8(acchdl, &rreq->operation, 1436 BLKIF_OP_WRITE_BARRIER); 1437 break; 1438 default: 1439 if (!vdp->xdf_wce) 1440 ddi_put8(acchdl, &rreq->operation, 1441 BLKIF_OP_WRITE_BARRIER); 1442 else 1443 ddi_put8(acchdl, &rreq->operation, 1444 BLKIF_OP_WRITE); 1445 break; 1446 } 1447 } 1448 1449 ddi_put16(acchdl, &rreq->handle, vdev); 1450 ddi_put64(acchdl, &rreq->sector_number, blkno); 1451 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(vreq->v_gs)); 1452 1453 /* 1454 * loop until all segments are populated or no more dma cookie in buf 1455 */ 1456 for (;;) { 1457 /* 1458 * Each segment of a blkif request can transfer up to 1459 * one 4K page of data. 1460 */ 1461 bcnt = vreq->v_dmac.dmac_size; 1462 ASSERT(bcnt <= PAGESIZE); 1463 ASSERT((bcnt % XB_BSIZE) == 0); 1464 dma_addr = vreq->v_dmac.dmac_laddress; 1465 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1466 ASSERT((blk_off & XB_BMASK) == 0); 1467 fsect = blk_off >> XB_BSHIFT; 1468 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1469 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1470 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1471 DPRINTF(IO_DBG, (" ""seg%d: dmacS %lu blk_off %ld\n", 1472 seg, vreq->v_dmac.dmac_size, blk_off)); 1473 gr = gs_grant(vreq->v_gs, PATOMA(dma_addr) >> PAGESHIFT); 1474 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1475 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1476 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1477 DPRINTF(IO_DBG, (" ""seg%d: fs %d ls %d gr %d dma 0x%"PRIx64 1478 "\n", seg, fsect, lsect, gr, dma_addr)); 1479 1480 blkno += (bcnt >> XB_BSHIFT); 1481 seg++; 1482 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1483 if (--ndmacs) { 1484 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1485 continue; 1486 } 1487 1488 vreq->v_status = VREQ_DMAWIN_DONE; 1489 vreq->v_blkno = blkno; 1490 if (vreq->v_dmaw + 1 == vreq->v_ndmaws) 1491 /* last win */ 1492 rval = XF_COMP; 1493 else 1494 rval = XF_PARTIAL; 1495 break; 1496 } 1497 ddi_put8(acchdl, &rreq->nr_segments, seg); 1498 DPRINTF(IO_DBG, ("xdf_prepare_rreq: request id=%"PRIx64" ready\n", 1499 rreq->id)); 1500 1501 return (rval); 1502 } 1503 1504 #define XDF_QSEC 50000 /* .005 second */ 1505 #define XDF_POLLCNT 12 /* loop for 12 times before time out */ 1506 1507 static int 1508 xdf_drain_io(xdf_t *vdp) 1509 { 1510 int pollc, rval; 1511 xendev_ring_t *xbr; 1512 1513 if (xdfdebug & SUSRES_DBG) 1514 xen_printf("xdf_drain_io: start\n"); 1515 1516 mutex_enter(&vdp->xdf_dev_lk); 1517 1518 if ((vdp->xdf_status != XD_READY) && (vdp->xdf_status != XD_SUSPEND)) 1519 goto out; 1520 1521 rval = 0; 1522 xbr = vdp->xdf_xb_ring; 1523 ASSERT(xbr != NULL); 1524 1525 for (pollc = 0; pollc < XDF_POLLCNT; pollc++) { 1526 if (xvdi_ring_has_unconsumed_responses(xbr)) { 1527 mutex_exit(&vdp->xdf_dev_lk); 1528 (void) xdf_intr((caddr_t)vdp); 1529 mutex_enter(&vdp->xdf_dev_lk); 1530 } 1531 if (!xvdi_ring_has_incomp_request(xbr)) 1532 goto out; 1533 1534 #ifndef XPV_HVM_DRIVER 1535 (void) HYPERVISOR_yield(); 1536 #endif /* XPV_HVM_DRIVER */ 1537 /* 1538 * file-backed devices can be slow 1539 */ 1540 drv_usecwait(XDF_QSEC << pollc); 1541 } 1542 cmn_err(CE_WARN, "xdf_polled_io: timeout"); 1543 rval = EIO; 1544 out: 1545 mutex_exit(&vdp->xdf_dev_lk); 1546 if (xdfdebug & SUSRES_DBG) 1547 xen_printf("xdf_drain_io: end, err=%d\n", rval); 1548 return (rval); 1549 } 1550 1551 /* ARGSUSED5 */ 1552 int 1553 xdf_lb_rdwr(dev_info_t *devi, uchar_t cmd, void *bufp, 1554 diskaddr_t start, size_t reqlen, void *tg_cookie) 1555 { 1556 xdf_t *vdp; 1557 struct buf *bp; 1558 int err = 0; 1559 1560 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1561 if (vdp == NULL) 1562 return (ENXIO); 1563 1564 if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) 1565 return (EINVAL); 1566 1567 bp = getrbuf(KM_SLEEP); 1568 if (cmd == TG_READ) 1569 bp->b_flags = B_BUSY | B_READ; 1570 else 1571 bp->b_flags = B_BUSY | B_WRITE; 1572 bp->b_un.b_addr = bufp; 1573 bp->b_bcount = reqlen; 1574 bp->b_blkno = start; 1575 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 1576 1577 mutex_enter(&vdp->xdf_dev_lk); 1578 if (vdp->xdf_xdev_iostat != NULL) 1579 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1580 if (vdp->xdf_f_act == NULL) { 1581 vdp->xdf_f_act = vdp->xdf_l_act = bp; 1582 } else { 1583 vdp->xdf_l_act->av_forw = bp; 1584 vdp->xdf_l_act = bp; 1585 } 1586 mutex_exit(&vdp->xdf_dev_lk); 1587 xdf_iostart(vdp); 1588 err = biowait(bp); 1589 1590 ASSERT(bp->b_flags & B_DONE); 1591 1592 freerbuf(bp); 1593 return (err); 1594 } 1595 1596 /* 1597 * synthetic geometry 1598 */ 1599 #define XDF_NSECTS 256 1600 #define XDF_NHEADS 16 1601 1602 static void 1603 xdf_synthetic_pgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1604 { 1605 xdf_t *vdp; 1606 uint_t ncyl; 1607 1608 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1609 1610 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); 1611 1612 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1613 geomp->g_acyl = 0; 1614 geomp->g_nhead = XDF_NHEADS; 1615 geomp->g_secsize = XB_BSIZE; 1616 geomp->g_nsect = XDF_NSECTS; 1617 geomp->g_intrlv = 0; 1618 geomp->g_rpm = 7200; 1619 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1620 } 1621 1622 static int 1623 xdf_lb_getcap(dev_info_t *devi, diskaddr_t *capp) 1624 { 1625 xdf_t *vdp; 1626 1627 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1628 1629 if (vdp == NULL) 1630 return (ENXIO); 1631 1632 mutex_enter(&vdp->xdf_dev_lk); 1633 *capp = vdp->xdf_pgeom.g_capacity; 1634 DPRINTF(LBL_DBG, ("capacity %llu\n", *capp)); 1635 mutex_exit(&vdp->xdf_dev_lk); 1636 return (0); 1637 } 1638 1639 static int 1640 xdf_lb_getpgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1641 { 1642 xdf_t *vdp; 1643 1644 if ((vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi))) == NULL) 1645 return (ENXIO); 1646 *geomp = vdp->xdf_pgeom; 1647 return (0); 1648 } 1649 1650 /* 1651 * No real HBA, no geometry available from it 1652 */ 1653 /*ARGSUSED*/ 1654 static int 1655 xdf_lb_getvgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1656 { 1657 return (EINVAL); 1658 } 1659 1660 static int 1661 xdf_lb_getattribute(dev_info_t *devi, tg_attribute_t *tgattributep) 1662 { 1663 xdf_t *vdp; 1664 1665 if (!(vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)))) 1666 return (ENXIO); 1667 1668 if (XD_IS_RO(vdp)) 1669 tgattributep->media_is_writable = 0; 1670 else 1671 tgattributep->media_is_writable = 1; 1672 return (0); 1673 } 1674 1675 /* ARGSUSED3 */ 1676 int 1677 xdf_lb_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie) 1678 { 1679 switch (cmd) { 1680 case TG_GETPHYGEOM: 1681 return (xdf_lb_getpgeom(devi, (cmlb_geom_t *)arg)); 1682 case TG_GETVIRTGEOM: 1683 return (xdf_lb_getvgeom(devi, (cmlb_geom_t *)arg)); 1684 case TG_GETCAPACITY: 1685 return (xdf_lb_getcap(devi, (diskaddr_t *)arg)); 1686 case TG_GETBLOCKSIZE: 1687 *(uint32_t *)arg = XB_BSIZE; 1688 return (0); 1689 case TG_GETATTR: 1690 return (xdf_lb_getattribute(devi, (tg_attribute_t *)arg)); 1691 default: 1692 return (ENOTTY); 1693 } 1694 } 1695 1696 /* 1697 * Kick-off connect process 1698 * Status should be XD_UNKNOWN or XD_CLOSED 1699 * On success, status will be changed to XD_INIT 1700 * On error, status won't be changed 1701 */ 1702 static int 1703 xdf_start_connect(xdf_t *vdp) 1704 { 1705 char *xsnode; 1706 grant_ref_t gref; 1707 xenbus_transaction_t xbt; 1708 int rv; 1709 dev_info_t *dip = vdp->xdf_dip; 1710 1711 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == (domid_t)-1) 1712 goto errout; 1713 1714 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) { 1715 cmn_err(CE_WARN, "xdf@%s: failed to alloc event channel", 1716 ddi_get_name_addr(dip)); 1717 goto errout; 1718 } 1719 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1720 #ifdef XPV_HVM_DRIVER 1721 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1722 #else /* !XPV_HVM_DRIVER */ 1723 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1724 DDI_SUCCESS) { 1725 cmn_err(CE_WARN, "xdf_start_connect: xdf@%s: " 1726 "failed to add intr handler", ddi_get_name_addr(dip)); 1727 goto errout1; 1728 } 1729 #endif /* !XPV_HVM_DRIVER */ 1730 1731 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1732 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1733 DDI_SUCCESS) { 1734 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1735 ddi_get_name_addr(dip)); 1736 goto errout2; 1737 } 1738 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1739 1740 /* 1741 * Write into xenstore the info needed by backend 1742 */ 1743 if ((xsnode = xvdi_get_xsname(dip)) == NULL) { 1744 cmn_err(CE_WARN, "xdf@%s: " 1745 "failed to get xenstore node path", 1746 ddi_get_name_addr(dip)); 1747 goto fail_trans; 1748 } 1749 trans_retry: 1750 if (xenbus_transaction_start(&xbt)) { 1751 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1752 ddi_get_name_addr(dip)); 1753 xvdi_fatal_error(dip, EIO, "transaction start"); 1754 goto fail_trans; 1755 } 1756 1757 if (rv = xenbus_printf(xbt, xsnode, "ring-ref", "%u", gref)) { 1758 cmn_err(CE_WARN, "xdf@%s: failed to write ring-ref", 1759 ddi_get_name_addr(dip)); 1760 xvdi_fatal_error(dip, rv, "writing ring-ref"); 1761 goto abort_trans; 1762 } 1763 1764 if (rv = xenbus_printf(xbt, xsnode, "event-channel", "%u", 1765 vdp->xdf_evtchn)) { 1766 cmn_err(CE_WARN, "xdf@%s: failed to write event-channel", 1767 ddi_get_name_addr(dip)); 1768 xvdi_fatal_error(dip, rv, "writing event-channel"); 1769 goto abort_trans; 1770 } 1771 1772 /* 1773 * "protocol" is written by the domain builder in the case of PV 1774 * domains. However, it is not written for HVM domains, so let's 1775 * write it here. 1776 */ 1777 if (rv = xenbus_printf(xbt, xsnode, "protocol", "%s", 1778 XEN_IO_PROTO_ABI_NATIVE)) { 1779 cmn_err(CE_WARN, "xdf@%s: failed to write protocol", 1780 ddi_get_name_addr(dip)); 1781 xvdi_fatal_error(dip, rv, "writing protocol"); 1782 goto abort_trans; 1783 } 1784 1785 if ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0) { 1786 cmn_err(CE_WARN, "xdf@%s: " 1787 "failed to switch state to XenbusStateInitialised", 1788 ddi_get_name_addr(dip)); 1789 xvdi_fatal_error(dip, rv, "writing state"); 1790 goto abort_trans; 1791 } 1792 1793 /* kick-off connect process */ 1794 if (rv = xenbus_transaction_end(xbt, 0)) { 1795 if (rv == EAGAIN) 1796 goto trans_retry; 1797 cmn_err(CE_WARN, "xdf@%s: failed to end transaction", 1798 ddi_get_name_addr(dip)); 1799 xvdi_fatal_error(dip, rv, "completing transaction"); 1800 goto fail_trans; 1801 } 1802 1803 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1804 mutex_enter(&vdp->xdf_dev_lk); 1805 vdp->xdf_status = XD_INIT; 1806 mutex_exit(&vdp->xdf_dev_lk); 1807 1808 return (DDI_SUCCESS); 1809 1810 abort_trans: 1811 (void) xenbus_transaction_end(xbt, 1); 1812 fail_trans: 1813 xvdi_free_ring(vdp->xdf_xb_ring); 1814 errout2: 1815 #ifdef XPV_HVM_DRIVER 1816 ec_unbind_evtchn(vdp->xdf_evtchn); 1817 #else /* !XPV_HVM_DRIVER */ 1818 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1819 #endif /* !XPV_HVM_DRIVER */ 1820 errout1: 1821 xvdi_free_evtchn(dip); 1822 errout: 1823 cmn_err(CE_WARN, "xdf@%s: fail to kick-off connecting", 1824 ddi_get_name_addr(dip)); 1825 return (DDI_FAILURE); 1826 } 1827 1828 /* 1829 * Kick-off disconnect process 1830 * Status won't be changed 1831 */ 1832 static int 1833 xdf_start_disconnect(xdf_t *vdp) 1834 { 1835 if (xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed) > 0) { 1836 cmn_err(CE_WARN, "xdf@%s: fail to kick-off disconnecting", 1837 ddi_get_name_addr(vdp->xdf_dip)); 1838 return (DDI_FAILURE); 1839 } 1840 1841 return (DDI_SUCCESS); 1842 } 1843 1844 int 1845 xdf_get_flush_block(xdf_t *vdp) 1846 { 1847 /* 1848 * Get a DEV_BSIZE aligned bufer 1849 */ 1850 vdp->xdf_flush_mem = kmem_alloc(DEV_BSIZE * 2, KM_SLEEP); 1851 vdp->xdf_cache_flush_block = 1852 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), DEV_BSIZE); 1853 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1854 xdf_flush_block, DEV_BSIZE, NULL) != 0) 1855 return (DDI_FAILURE); 1856 return (DDI_SUCCESS); 1857 } 1858 1859 /* 1860 * Finish other initialization after we've connected to backend 1861 * Status should be XD_INIT before calling this routine 1862 * On success, status should be changed to XD_READY 1863 * On error, status should stay XD_INIT 1864 */ 1865 static int 1866 xdf_post_connect(xdf_t *vdp) 1867 { 1868 int rv; 1869 uint_t len; 1870 char *type; 1871 char *barrier; 1872 dev_info_t *devi = vdp->xdf_dip; 1873 1874 /* 1875 * Determine if feature barrier is supported by backend 1876 */ 1877 if (xenbus_read(XBT_NULL, xvdi_get_oename(devi), 1878 "feature-barrier", (void **)&barrier, &len) == 0) { 1879 vdp->xdf_feature_barrier = 1; 1880 kmem_free(barrier, len); 1881 } else { 1882 cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier", 1883 ddi_get_name_addr(vdp->xdf_dip)); 1884 vdp->xdf_feature_barrier = 0; 1885 } 1886 1887 /* probe backend */ 1888 if (rv = xenbus_gather(XBT_NULL, xvdi_get_oename(devi), 1889 "sectors", "%"SCNu64, &vdp->xdf_xdev_nblocks, 1890 "info", "%u", &vdp->xdf_xdev_info, NULL)) { 1891 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1892 "cannot read backend info", ddi_get_name_addr(devi)); 1893 xvdi_fatal_error(devi, rv, "reading backend info"); 1894 return (DDI_FAILURE); 1895 } 1896 1897 /* 1898 * Make sure that the device we're connecting isn't smaller than 1899 * the old connected device. 1900 */ 1901 if (vdp->xdf_xdev_nblocks < vdp->xdf_pgeom.g_capacity) { 1902 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1903 "backend disk device shrank", ddi_get_name_addr(devi)); 1904 /* XXX: call xvdi_fatal_error() here? */ 1905 xvdi_fatal_error(devi, rv, "reading backend info"); 1906 return (DDI_FAILURE); 1907 } 1908 1909 /* 1910 * Only update the physical geometry to reflect the new device 1911 * size if this is the first time we're connecting to the backend 1912 * device. Once we assign a physical geometry to a device it stays 1913 * fixed until: 1914 * - we get detach and re-attached (at which point we 1915 * automatically assign a new physical geometry). 1916 * - someone calls TG_SETPHYGEOM to explicity set the 1917 * physical geometry. 1918 */ 1919 if (vdp->xdf_pgeom.g_capacity == 0) 1920 xdf_synthetic_pgeom(devi, &vdp->xdf_pgeom); 1921 1922 /* fix disk type */ 1923 if (xenbus_read(XBT_NULL, xvdi_get_xsname(devi), "device-type", 1924 (void **)&type, &len) != 0) { 1925 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1926 "cannot read device-type", ddi_get_name_addr(devi)); 1927 xvdi_fatal_error(devi, rv, "reading device-type"); 1928 return (DDI_FAILURE); 1929 } 1930 if (strcmp(type, "cdrom") == 0) 1931 vdp->xdf_xdev_info |= VDISK_CDROM; 1932 kmem_free(type, len); 1933 1934 /* 1935 * We've created all the minor nodes via cmlb_attach() using default 1936 * value in xdf_attach() to make it possible to block in xdf_open(), 1937 * in case there's anyone (say, booting thread) ever trying to open 1938 * it before connected to backend. We will refresh all those minor 1939 * nodes w/ latest info we've got now when we are almost connected. 1940 * 1941 * Don't do this when xdf is already opened by someone (could happen 1942 * during resume), for that cmlb_attach() will invalid the label info 1943 * and confuse those who has already opened the node, which is bad. 1944 */ 1945 if (!xdf_isopen(vdp, -1) && (XD_IS_CD(vdp) || XD_IS_RM(vdp))) { 1946 /* re-init cmlb w/ latest info we got from backend */ 1947 if (cmlb_attach(devi, &xdf_lb_ops, 1948 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 1949 XD_IS_RM(vdp), 1, 1950 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD, 1951 #if defined(XPV_HVM_DRIVER) 1952 CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT | 1953 CMLB_INTERNAL_MINOR_NODES, 1954 #else /* !XPV_HVM_DRIVER */ 1955 CMLB_FAKE_LABEL_ONE_PARTITION, 1956 #endif /* !XPV_HVM_DRIVER */ 1957 vdp->xdf_vd_lbl, NULL) != 0) { 1958 cmn_err(CE_WARN, "xdf@%s: cmlb attach failed", 1959 ddi_get_name_addr(devi)); 1960 return (DDI_FAILURE); 1961 } 1962 } 1963 1964 /* mark vbd is ready for I/O */ 1965 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1966 mutex_enter(&vdp->xdf_dev_lk); 1967 vdp->xdf_status = XD_READY; 1968 mutex_exit(&vdp->xdf_dev_lk); 1969 /* 1970 * If backend has feature-barrier, see if it supports disk 1971 * cache flush op. 1972 */ 1973 vdp->xdf_flush_supported = 0; 1974 if (vdp->xdf_feature_barrier) { 1975 /* 1976 * Pretend we already know flush is supported so probe 1977 * will attempt the correct op. 1978 */ 1979 vdp->xdf_flush_supported = 1; 1980 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1981 vdp->xdf_flush_supported = 1; 1982 } else { 1983 vdp->xdf_flush_supported = 0; 1984 /* 1985 * If the other end does not support the cache flush op 1986 * then we must use a barrier-write to force disk 1987 * cache flushing. Barrier writes require that a data 1988 * block actually be written. 1989 * Cache a block to barrier-write when we are 1990 * asked to perform a flush. 1991 * XXX - would it be better to just copy 1 block 1992 * (512 bytes) from whatever write we did last 1993 * and rewrite that block? 1994 */ 1995 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) 1996 return (DDI_FAILURE); 1997 } 1998 } 1999 2000 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", ddi_get_name_addr(devi), 2001 (uint64_t)vdp->xdf_xdev_nblocks); 2002 2003 return (DDI_SUCCESS); 2004 } 2005 2006 /* 2007 * Finish other uninitialization after we've disconnected from backend 2008 * when status is XD_CLOSING or XD_INIT. After returns, status is XD_CLOSED 2009 */ 2010 static void 2011 xdf_post_disconnect(xdf_t *vdp) 2012 { 2013 #ifdef XPV_HVM_DRIVER 2014 ec_unbind_evtchn(vdp->xdf_evtchn); 2015 #else /* !XPV_HVM_DRIVER */ 2016 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 2017 #endif /* !XPV_HVM_DRIVER */ 2018 xvdi_free_evtchn(vdp->xdf_dip); 2019 xvdi_free_ring(vdp->xdf_xb_ring); 2020 vdp->xdf_xb_ring = NULL; 2021 vdp->xdf_xb_ring_hdl = NULL; 2022 vdp->xdf_peer = (domid_t)-1; 2023 2024 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 2025 mutex_enter(&vdp->xdf_dev_lk); 2026 vdp->xdf_status = XD_CLOSED; 2027 mutex_exit(&vdp->xdf_dev_lk); 2028 } 2029 2030 /*ARGSUSED*/ 2031 static void 2032 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 2033 { 2034 XenbusState new_state = *(XenbusState *)impl_data; 2035 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2036 boolean_t unexpect_die = B_FALSE; 2037 int status; 2038 2039 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 2040 ddi_get_name_addr(dip), new_state)); 2041 2042 mutex_enter(&vdp->xdf_cb_lk); 2043 2044 if (xdf_check_state_transition(vdp, new_state) == DDI_FAILURE) { 2045 mutex_exit(&vdp->xdf_cb_lk); 2046 return; 2047 } 2048 2049 switch (new_state) { 2050 case XenbusStateInitialising: 2051 ASSERT(vdp->xdf_status == XD_CLOSED); 2052 /* 2053 * backend recovered from a previous failure, 2054 * kick-off connect process again 2055 */ 2056 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 2057 cmn_err(CE_WARN, "xdf@%s:" 2058 " failed to start reconnecting to backend", 2059 ddi_get_name_addr(dip)); 2060 } 2061 break; 2062 case XenbusStateConnected: 2063 ASSERT(vdp->xdf_status == XD_INIT); 2064 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 2065 /* finish final init after connect */ 2066 if (xdf_post_connect(vdp) != DDI_SUCCESS) 2067 (void) xdf_start_disconnect(vdp); 2068 break; 2069 case XenbusStateClosing: 2070 if (vdp->xdf_status == XD_READY) { 2071 mutex_enter(&vdp->xdf_dev_lk); 2072 if (xdf_isopen(vdp, -1)) { 2073 cmn_err(CE_NOTE, "xdf@%s: hot-unplug failed, " 2074 "still in use", ddi_get_name_addr(dip)); 2075 mutex_exit(&vdp->xdf_dev_lk); 2076 break; 2077 } else { 2078 vdp->xdf_status = XD_CLOSING; 2079 } 2080 mutex_exit(&vdp->xdf_dev_lk); 2081 } 2082 (void) xdf_start_disconnect(vdp); 2083 break; 2084 case XenbusStateClosed: 2085 /* first check if BE closed unexpectedly */ 2086 mutex_enter(&vdp->xdf_dev_lk); 2087 if (xdf_isopen(vdp, -1)) { 2088 unexpect_die = B_TRUE; 2089 unexpectedie(vdp); 2090 cmn_err(CE_WARN, "xdf@%s: backend closed, " 2091 "reconnecting...", ddi_get_name_addr(dip)); 2092 } 2093 mutex_exit(&vdp->xdf_dev_lk); 2094 2095 if (vdp->xdf_status == XD_READY) { 2096 mutex_enter(&vdp->xdf_dev_lk); 2097 vdp->xdf_status = XD_CLOSING; 2098 mutex_exit(&vdp->xdf_dev_lk); 2099 2100 #ifdef DOMU_BACKEND 2101 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 2102 #endif 2103 2104 xdf_post_disconnect(vdp); 2105 (void) xvdi_switch_state(dip, XBT_NULL, 2106 XenbusStateClosed); 2107 } else if ((vdp->xdf_status == XD_INIT) || 2108 (vdp->xdf_status == XD_CLOSING)) { 2109 xdf_post_disconnect(vdp); 2110 } else { 2111 mutex_enter(&vdp->xdf_dev_lk); 2112 vdp->xdf_status = XD_CLOSED; 2113 mutex_exit(&vdp->xdf_dev_lk); 2114 } 2115 } 2116 2117 /* notify anybody waiting for oe state change */ 2118 mutex_enter(&vdp->xdf_dev_lk); 2119 cv_broadcast(&vdp->xdf_dev_cv); 2120 mutex_exit(&vdp->xdf_dev_lk); 2121 2122 status = vdp->xdf_status; 2123 mutex_exit(&vdp->xdf_cb_lk); 2124 2125 if (status == XD_READY) { 2126 xdf_iostart(vdp); 2127 } else if ((status == XD_CLOSED) && !unexpect_die) { 2128 /* interface is closed successfully, remove all minor nodes */ 2129 cmlb_detach(vdp->xdf_vd_lbl, NULL); 2130 cmlb_free_handle(&vdp->xdf_vd_lbl); 2131 } 2132 } 2133 2134 /* check if partition is open, -1 - check all partitions on the disk */ 2135 static boolean_t 2136 xdf_isopen(xdf_t *vdp, int partition) 2137 { 2138 int i; 2139 ulong_t parbit; 2140 boolean_t rval = B_FALSE; 2141 2142 ASSERT((partition == -1) || 2143 ((partition >= 0) || (partition < XDF_PEXT))); 2144 2145 if (partition == -1) 2146 parbit = (ulong_t)-1; 2147 else 2148 parbit = 1 << partition; 2149 2150 for (i = 0; i < OTYPCNT; i++) { 2151 if (vdp->xdf_vd_open[i] & parbit) 2152 rval = B_TRUE; 2153 } 2154 2155 return (rval); 2156 } 2157 2158 /* 2159 * Xdf_check_state_transition will check the XenbusState change to see 2160 * if the change is a valid transition or not. 2161 * The new state is written by backend domain, or by running xenstore-write 2162 * to change it manually in dom0 2163 */ 2164 static int 2165 xdf_check_state_transition(xdf_t *vdp, XenbusState oestate) 2166 { 2167 int status; 2168 int stcheck; 2169 #define STOK 0 /* need further process */ 2170 #define STNOP 1 /* no action need taking */ 2171 #define STBUG 2 /* unexpected state change, could be a bug */ 2172 2173 status = vdp->xdf_status; 2174 stcheck = STOK; 2175 2176 switch (status) { 2177 case XD_UNKNOWN: 2178 if ((oestate == XenbusStateUnknown) || 2179 (oestate == XenbusStateConnected)) 2180 stcheck = STBUG; 2181 else if ((oestate == XenbusStateInitialising) || 2182 (oestate == XenbusStateInitWait) || 2183 (oestate == XenbusStateInitialised)) 2184 stcheck = STNOP; 2185 break; 2186 case XD_INIT: 2187 if (oestate == XenbusStateUnknown) 2188 stcheck = STBUG; 2189 else if ((oestate == XenbusStateInitialising) || 2190 (oestate == XenbusStateInitWait) || 2191 (oestate == XenbusStateInitialised)) 2192 stcheck = STNOP; 2193 break; 2194 case XD_READY: 2195 if ((oestate == XenbusStateUnknown) || 2196 (oestate == XenbusStateInitialising) || 2197 (oestate == XenbusStateInitWait) || 2198 (oestate == XenbusStateInitialised)) 2199 stcheck = STBUG; 2200 else if (oestate == XenbusStateConnected) 2201 stcheck = STNOP; 2202 break; 2203 case XD_CLOSING: 2204 if ((oestate == XenbusStateUnknown) || 2205 (oestate == XenbusStateInitialising) || 2206 (oestate == XenbusStateInitWait) || 2207 (oestate == XenbusStateInitialised) || 2208 (oestate == XenbusStateConnected)) 2209 stcheck = STBUG; 2210 else if (oestate == XenbusStateClosing) 2211 stcheck = STNOP; 2212 break; 2213 case XD_CLOSED: 2214 if ((oestate == XenbusStateUnknown) || 2215 (oestate == XenbusStateConnected)) 2216 stcheck = STBUG; 2217 else if ((oestate == XenbusStateInitWait) || 2218 (oestate == XenbusStateInitialised) || 2219 (oestate == XenbusStateClosing) || 2220 (oestate == XenbusStateClosed)) 2221 stcheck = STNOP; 2222 break; 2223 case XD_SUSPEND: 2224 default: 2225 stcheck = STBUG; 2226 } 2227 2228 if (stcheck == STOK) 2229 return (DDI_SUCCESS); 2230 2231 if (stcheck == STBUG) 2232 cmn_err(CE_NOTE, "xdf@%s: unexpected otherend " 2233 "state change to %d!, when status is %d", 2234 ddi_get_name_addr(vdp->xdf_dip), oestate, status); 2235 2236 return (DDI_FAILURE); 2237 } 2238 2239 static int 2240 xdf_connect(xdf_t *vdp, boolean_t wait) 2241 { 2242 ASSERT(mutex_owned(&vdp->xdf_dev_lk)); 2243 while (vdp->xdf_status != XD_READY) { 2244 if (!wait || (vdp->xdf_status > XD_READY)) 2245 break; 2246 2247 if (cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk) == 0) 2248 break; 2249 } 2250 2251 return (vdp->xdf_status); 2252 } 2253 2254 /* 2255 * callback func when DMA/GTE resources is available 2256 * 2257 * Note: we only register one callback function to grant table subsystem 2258 * since we only have one 'struct gnttab_free_callback' in xdf_t. 2259 */ 2260 static int 2261 xdf_dmacallback(caddr_t arg) 2262 { 2263 xdf_t *vdp = (xdf_t *)arg; 2264 ASSERT(vdp != NULL); 2265 2266 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 2267 ddi_get_name_addr(vdp->xdf_dip))); 2268 2269 ddi_trigger_softintr(vdp->xdf_softintr_id); 2270 return (DDI_DMA_CALLBACK_DONE); 2271 } 2272 2273 static uint_t 2274 xdf_iorestart(caddr_t arg) 2275 { 2276 xdf_t *vdp = (xdf_t *)arg; 2277 2278 ASSERT(vdp != NULL); 2279 2280 mutex_enter(&vdp->xdf_dev_lk); 2281 ASSERT(ISDMACBON(vdp)); 2282 SETDMACBOFF(vdp); 2283 mutex_exit(&vdp->xdf_dev_lk); 2284 2285 xdf_iostart(vdp); 2286 2287 return (DDI_INTR_CLAIMED); 2288 } 2289 2290 static void 2291 xdf_timeout_handler(void *arg) 2292 { 2293 xdf_t *vdp = arg; 2294 2295 mutex_enter(&vdp->xdf_dev_lk); 2296 vdp->xdf_timeout_id = 0; 2297 mutex_exit(&vdp->xdf_dev_lk); 2298 2299 /* new timeout thread could be re-scheduled */ 2300 xdf_iostart(vdp); 2301 } 2302 2303 /* 2304 * Alloc a vreq for this bp 2305 * bp->av_back contains the pointer to the vreq upon return 2306 */ 2307 static v_req_t * 2308 vreq_get(xdf_t *vdp, buf_t *bp) 2309 { 2310 v_req_t *vreq = NULL; 2311 2312 ASSERT(BP2VREQ(bp) == NULL); 2313 2314 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 2315 if (vreq == NULL) { 2316 if (vdp->xdf_timeout_id == 0) 2317 /* restart I/O after one second */ 2318 vdp->xdf_timeout_id = 2319 timeout(xdf_timeout_handler, vdp, hz); 2320 return (NULL); 2321 } 2322 bzero(vreq, sizeof (v_req_t)); 2323 2324 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 2325 bp->av_back = (buf_t *)vreq; 2326 vreq->v_buf = bp; 2327 vreq->v_status = VREQ_INIT; 2328 /* init of other fields in vreq is up to the caller */ 2329 2330 return (vreq); 2331 } 2332 2333 static void 2334 vreq_free(xdf_t *vdp, v_req_t *vreq) 2335 { 2336 buf_t *bp = vreq->v_buf; 2337 2338 list_remove(&vdp->xdf_vreq_act, (void *)vreq); 2339 2340 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 2341 goto done; 2342 2343 switch (vreq->v_status) { 2344 case VREQ_DMAWIN_DONE: 2345 case VREQ_GS_ALLOCED: 2346 case VREQ_DMABUF_BOUND: 2347 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 2348 /*FALLTHRU*/ 2349 case VREQ_DMAMEM_ALLOCED: 2350 if (!ALIGNED_XFER(bp)) { 2351 ASSERT(vreq->v_abuf != NULL); 2352 if (!IS_ERROR(bp) && IS_READ(bp)) 2353 bcopy(vreq->v_abuf, bp->b_un.b_addr, 2354 bp->b_bcount); 2355 ddi_dma_mem_free(&vreq->v_align); 2356 } 2357 /*FALLTHRU*/ 2358 case VREQ_MEMDMAHDL_ALLOCED: 2359 if (!ALIGNED_XFER(bp)) 2360 ddi_dma_free_handle(&vreq->v_memdmahdl); 2361 /*FALLTHRU*/ 2362 case VREQ_DMAHDL_ALLOCED: 2363 ddi_dma_free_handle(&vreq->v_dmahdl); 2364 break; 2365 default: 2366 break; 2367 } 2368 done: 2369 vreq->v_buf->av_back = NULL; 2370 kmem_cache_free(xdf_vreq_cache, vreq); 2371 } 2372 2373 /* 2374 * Initalize the DMA and grant table resources for the buf 2375 */ 2376 static int 2377 vreq_setup(xdf_t *vdp, v_req_t *vreq) 2378 { 2379 int rc; 2380 ddi_dma_attr_t dmaattr; 2381 uint_t ndcs, ndws; 2382 ddi_dma_handle_t dh; 2383 ddi_dma_handle_t mdh; 2384 ddi_dma_cookie_t dc; 2385 ddi_acc_handle_t abh; 2386 caddr_t aba; 2387 ge_slot_t *gs; 2388 size_t bufsz; 2389 off_t off; 2390 size_t sz; 2391 buf_t *bp = vreq->v_buf; 2392 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 2393 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 2394 2395 switch (vreq->v_status) { 2396 case VREQ_INIT: 2397 if (IS_FLUSH_DISKCACHE(bp)) { 2398 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2399 DPRINTF(DMA_DBG, ( 2400 "xdf@%s: get ge_slotfailed\n", 2401 ddi_get_name_addr(vdp->xdf_dip))); 2402 return (DDI_FAILURE); 2403 } 2404 vreq->v_blkno = 0; 2405 vreq->v_nslots = 1; 2406 vreq->v_gs = gs; 2407 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 2408 vreq->v_status = VREQ_GS_ALLOCED; 2409 gs->vreq = vreq; 2410 return (DDI_SUCCESS); 2411 } 2412 2413 if (IS_WRITE_BARRIER(vdp, bp)) 2414 vreq->v_flush_diskcache = WRITE_BARRIER; 2415 vreq->v_blkno = bp->b_blkno + 2416 (diskaddr_t)(uintptr_t)bp->b_private; 2417 bp->b_private = NULL; 2418 /* See if we wrote new data to our flush block */ 2419 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 2420 check_fbwrite(vdp, bp, vreq->v_blkno); 2421 vreq->v_status = VREQ_INIT_DONE; 2422 /*FALLTHRU*/ 2423 2424 case VREQ_INIT_DONE: 2425 /* 2426 * alloc DMA handle 2427 */ 2428 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 2429 xdf_dmacallback, (caddr_t)vdp, &dh); 2430 if (rc != DDI_SUCCESS) { 2431 SETDMACBON(vdp); 2432 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 2433 ddi_get_name_addr(vdp->xdf_dip))); 2434 return (DDI_FAILURE); 2435 } 2436 2437 vreq->v_dmahdl = dh; 2438 vreq->v_status = VREQ_DMAHDL_ALLOCED; 2439 /*FALLTHRU*/ 2440 2441 case VREQ_DMAHDL_ALLOCED: 2442 /* 2443 * alloc dma handle for 512-byte aligned buf 2444 */ 2445 if (!ALIGNED_XFER(bp)) { 2446 /* 2447 * XXPV: we need to temporarily enlarge the seg 2448 * boundary and s/g length to work round CR6381968 2449 */ 2450 dmaattr = xb_dma_attr; 2451 dmaattr.dma_attr_seg = (uint64_t)-1; 2452 dmaattr.dma_attr_sgllen = INT_MAX; 2453 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 2454 xdf_dmacallback, (caddr_t)vdp, &mdh); 2455 if (rc != DDI_SUCCESS) { 2456 SETDMACBON(vdp); 2457 DPRINTF(DMA_DBG, ("xdf@%s: unaligned buf DMA" 2458 "handle alloc failed\n", 2459 ddi_get_name_addr(vdp->xdf_dip))); 2460 return (DDI_FAILURE); 2461 } 2462 vreq->v_memdmahdl = mdh; 2463 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 2464 } 2465 /*FALLTHRU*/ 2466 2467 case VREQ_MEMDMAHDL_ALLOCED: 2468 /* 2469 * alloc 512-byte aligned buf 2470 */ 2471 if (!ALIGNED_XFER(bp)) { 2472 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 2473 bp_mapin(bp); 2474 2475 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 2476 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 2477 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 2478 &aba, &bufsz, &abh); 2479 if (rc != DDI_SUCCESS) { 2480 SETDMACBON(vdp); 2481 DPRINTF(DMA_DBG, ( 2482 "xdf@%s: DMA mem allocation failed\n", 2483 ddi_get_name_addr(vdp->xdf_dip))); 2484 return (DDI_FAILURE); 2485 } 2486 2487 vreq->v_abuf = aba; 2488 vreq->v_align = abh; 2489 vreq->v_status = VREQ_DMAMEM_ALLOCED; 2490 2491 ASSERT(bufsz >= bp->b_bcount); 2492 if (!IS_READ(bp)) 2493 bcopy(bp->b_un.b_addr, vreq->v_abuf, 2494 bp->b_bcount); 2495 } 2496 /*FALLTHRU*/ 2497 2498 case VREQ_DMAMEM_ALLOCED: 2499 /* 2500 * dma bind 2501 */ 2502 if (ALIGNED_XFER(bp)) { 2503 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 2504 dma_flags, xdf_dmacallback, (caddr_t)vdp, 2505 &dc, &ndcs); 2506 } else { 2507 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 2508 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 2509 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 2510 } 2511 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 2512 /* get num of dma windows */ 2513 if (rc == DDI_DMA_PARTIAL_MAP) { 2514 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 2515 ASSERT(rc == DDI_SUCCESS); 2516 } else { 2517 ndws = 1; 2518 } 2519 } else { 2520 SETDMACBON(vdp); 2521 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 2522 ddi_get_name_addr(vdp->xdf_dip))); 2523 return (DDI_FAILURE); 2524 } 2525 2526 vreq->v_dmac = dc; 2527 vreq->v_dmaw = 0; 2528 vreq->v_ndmacs = ndcs; 2529 vreq->v_ndmaws = ndws; 2530 vreq->v_nslots = ndws; 2531 vreq->v_status = VREQ_DMABUF_BOUND; 2532 /*FALLTHRU*/ 2533 2534 case VREQ_DMABUF_BOUND: 2535 /* 2536 * get ge_slot, callback is set upon failure from gs_get(), 2537 * if not set previously 2538 */ 2539 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2540 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2541 ddi_get_name_addr(vdp->xdf_dip))); 2542 return (DDI_FAILURE); 2543 } 2544 2545 vreq->v_gs = gs; 2546 gs->vreq = vreq; 2547 vreq->v_status = VREQ_GS_ALLOCED; 2548 break; 2549 2550 case VREQ_GS_ALLOCED: 2551 /* nothing need to be done */ 2552 break; 2553 2554 case VREQ_DMAWIN_DONE: 2555 /* 2556 * move to the next dma window 2557 */ 2558 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 2559 2560 /* get a ge_slot for this DMA window */ 2561 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2562 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2563 ddi_get_name_addr(vdp->xdf_dip))); 2564 return (DDI_FAILURE); 2565 } 2566 2567 vreq->v_gs = gs; 2568 gs->vreq = vreq; 2569 vreq->v_dmaw++; 2570 rc = ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 2571 &vreq->v_dmac, &vreq->v_ndmacs); 2572 ASSERT(rc == DDI_SUCCESS); 2573 vreq->v_status = VREQ_GS_ALLOCED; 2574 break; 2575 2576 default: 2577 return (DDI_FAILURE); 2578 } 2579 2580 return (DDI_SUCCESS); 2581 } 2582 2583 static ge_slot_t * 2584 gs_get(xdf_t *vdp, int isread) 2585 { 2586 grant_ref_t gh; 2587 ge_slot_t *gs; 2588 2589 /* try to alloc GTEs needed in this slot, first */ 2590 if (gnttab_alloc_grant_references( 2591 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 2592 if (vdp->xdf_gnt_callback.next == NULL) { 2593 SETDMACBON(vdp); 2594 gnttab_request_free_callback( 2595 &vdp->xdf_gnt_callback, 2596 (void (*)(void *))xdf_dmacallback, 2597 (void *)vdp, 2598 BLKIF_MAX_SEGMENTS_PER_REQUEST); 2599 } 2600 return (NULL); 2601 } 2602 2603 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 2604 if (gs == NULL) { 2605 gnttab_free_grant_references(gh); 2606 if (vdp->xdf_timeout_id == 0) 2607 /* restart I/O after one second */ 2608 vdp->xdf_timeout_id = 2609 timeout(xdf_timeout_handler, vdp, hz); 2610 return (NULL); 2611 } 2612 2613 /* init gs_slot */ 2614 list_insert_head(&vdp->xdf_gs_act, (void *)gs); 2615 gs->oeid = vdp->xdf_peer; 2616 gs->isread = isread; 2617 gs->ghead = gh; 2618 gs->ngrefs = 0; 2619 2620 return (gs); 2621 } 2622 2623 static void 2624 gs_free(xdf_t *vdp, ge_slot_t *gs) 2625 { 2626 int i; 2627 grant_ref_t *gp = gs->ge; 2628 int ngrefs = gs->ngrefs; 2629 boolean_t isread = gs->isread; 2630 2631 list_remove(&vdp->xdf_gs_act, (void *)gs); 2632 2633 /* release all grant table entry resources used in this slot */ 2634 for (i = 0; i < ngrefs; i++, gp++) 2635 gnttab_end_foreign_access(*gp, !isread, 0); 2636 gnttab_free_grant_references(gs->ghead); 2637 2638 kmem_cache_free(xdf_gs_cache, (void *)gs); 2639 } 2640 2641 static grant_ref_t 2642 gs_grant(ge_slot_t *gs, mfn_t mfn) 2643 { 2644 grant_ref_t gr = gnttab_claim_grant_reference(&gs->ghead); 2645 2646 ASSERT(gr != -1); 2647 ASSERT(gs->ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 2648 gs->ge[gs->ngrefs++] = gr; 2649 gnttab_grant_foreign_access_ref(gr, gs->oeid, mfn, !gs->isread); 2650 2651 return (gr); 2652 } 2653 2654 static void 2655 unexpectedie(xdf_t *vdp) 2656 { 2657 /* clean up I/Os in ring that have responses */ 2658 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 2659 mutex_exit(&vdp->xdf_dev_lk); 2660 (void) xdf_intr((caddr_t)vdp); 2661 mutex_enter(&vdp->xdf_dev_lk); 2662 } 2663 2664 /* free up all grant table entries */ 2665 while (!list_is_empty(&vdp->xdf_gs_act)) 2666 gs_free(vdp, list_head(&vdp->xdf_gs_act)); 2667 2668 /* 2669 * move bp back to active list orderly 2670 * vreq_busy is updated in vreq_free() 2671 */ 2672 while (!list_is_empty(&vdp->xdf_vreq_act)) { 2673 v_req_t *vreq = list_head(&vdp->xdf_vreq_act); 2674 buf_t *bp = vreq->v_buf; 2675 2676 bp->av_back = NULL; 2677 bp->b_resid = bp->b_bcount; 2678 if (vdp->xdf_f_act == NULL) { 2679 vdp->xdf_f_act = vdp->xdf_l_act = bp; 2680 } else { 2681 /* move to the head of list */ 2682 bp->av_forw = vdp->xdf_f_act; 2683 vdp->xdf_f_act = bp; 2684 } 2685 if (vdp->xdf_xdev_iostat != NULL) 2686 kstat_runq_back_to_waitq( 2687 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 2688 vreq_free(vdp, vreq); 2689 } 2690 } 2691 2692 static void 2693 xdfmin(struct buf *bp) 2694 { 2695 if (bp->b_bcount > xdf_maxphys) 2696 bp->b_bcount = xdf_maxphys; 2697 } 2698 2699 void 2700 xdf_kstat_delete(dev_info_t *dip) 2701 { 2702 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2703 kstat_t *kstat; 2704 2705 /* 2706 * The locking order here is xdf_iostat_lk and then xdf_dev_lk. 2707 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer 2708 * and the contents of the our kstat. xdf_iostat_lk is used 2709 * to protect the allocation and freeing of the actual kstat. 2710 * xdf_dev_lk can't be used for this purpose because kstat 2711 * readers use it to access the contents of the kstat and 2712 * hence it can't be held when calling kstat_delete(). 2713 */ 2714 mutex_enter(&vdp->xdf_iostat_lk); 2715 mutex_enter(&vdp->xdf_dev_lk); 2716 2717 if (vdp->xdf_xdev_iostat == NULL) { 2718 mutex_exit(&vdp->xdf_dev_lk); 2719 mutex_exit(&vdp->xdf_iostat_lk); 2720 return; 2721 } 2722 2723 kstat = vdp->xdf_xdev_iostat; 2724 vdp->xdf_xdev_iostat = NULL; 2725 mutex_exit(&vdp->xdf_dev_lk); 2726 2727 kstat_delete(kstat); 2728 mutex_exit(&vdp->xdf_iostat_lk); 2729 } 2730 2731 int 2732 xdf_kstat_create(dev_info_t *dip, char *ks_module, int ks_instance) 2733 { 2734 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2735 2736 /* See comment about locking in xdf_kstat_delete(). */ 2737 mutex_enter(&vdp->xdf_iostat_lk); 2738 mutex_enter(&vdp->xdf_dev_lk); 2739 2740 if (vdp->xdf_xdev_iostat != NULL) { 2741 mutex_exit(&vdp->xdf_dev_lk); 2742 mutex_exit(&vdp->xdf_iostat_lk); 2743 return (-1); 2744 } 2745 2746 if ((vdp->xdf_xdev_iostat = kstat_create( 2747 ks_module, ks_instance, NULL, "disk", 2748 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 2749 mutex_exit(&vdp->xdf_dev_lk); 2750 mutex_exit(&vdp->xdf_iostat_lk); 2751 return (-1); 2752 } 2753 2754 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 2755 kstat_install(vdp->xdf_xdev_iostat); 2756 mutex_exit(&vdp->xdf_dev_lk); 2757 mutex_exit(&vdp->xdf_iostat_lk); 2758 2759 return (0); 2760 } 2761 2762 #if defined(XPV_HVM_DRIVER) 2763 2764 typedef struct xdf_hvm_entry { 2765 list_node_t xdf_he_list; 2766 char *xdf_he_path; 2767 dev_info_t *xdf_he_dip; 2768 } xdf_hvm_entry_t; 2769 2770 static list_t xdf_hvm_list; 2771 static kmutex_t xdf_hvm_list_lock; 2772 2773 static xdf_hvm_entry_t * 2774 i_xdf_hvm_find(char *path, dev_info_t *dip) 2775 { 2776 xdf_hvm_entry_t *i; 2777 2778 ASSERT((path != NULL) || (dip != NULL)); 2779 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); 2780 2781 i = list_head(&xdf_hvm_list); 2782 while (i != NULL) { 2783 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { 2784 i = list_next(&xdf_hvm_list, i); 2785 continue; 2786 } 2787 if ((dip != NULL) && (i->xdf_he_dip != dip)) { 2788 i = list_next(&xdf_hvm_list, i); 2789 continue; 2790 } 2791 break; 2792 } 2793 return (i); 2794 } 2795 2796 dev_info_t * 2797 xdf_hvm_hold(char *path) 2798 { 2799 xdf_hvm_entry_t *i; 2800 dev_info_t *dip; 2801 2802 mutex_enter(&xdf_hvm_list_lock); 2803 i = i_xdf_hvm_find(path, NULL); 2804 if (i == NULL) { 2805 mutex_exit(&xdf_hvm_list_lock); 2806 return (B_FALSE); 2807 } 2808 ndi_hold_devi(dip = i->xdf_he_dip); 2809 mutex_exit(&xdf_hvm_list_lock); 2810 return (dip); 2811 } 2812 2813 static void 2814 xdf_hvm_add(dev_info_t *dip) 2815 { 2816 xdf_hvm_entry_t *i; 2817 char *path; 2818 2819 /* figure out the path for the dip */ 2820 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 2821 (void) ddi_pathname(dip, path); 2822 2823 i = kmem_alloc(sizeof (*i), KM_SLEEP); 2824 i->xdf_he_dip = dip; 2825 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); 2826 2827 mutex_enter(&xdf_hvm_list_lock); 2828 ASSERT(i_xdf_hvm_find(path, NULL) == NULL); 2829 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); 2830 list_insert_head(&xdf_hvm_list, i); 2831 mutex_exit(&xdf_hvm_list_lock); 2832 2833 kmem_free(path, MAXPATHLEN); 2834 } 2835 2836 static void 2837 xdf_hvm_rm(dev_info_t *dip) 2838 { 2839 xdf_hvm_entry_t *i; 2840 2841 mutex_enter(&xdf_hvm_list_lock); 2842 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); 2843 list_remove(&xdf_hvm_list, i); 2844 mutex_exit(&xdf_hvm_list_lock); 2845 2846 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); 2847 kmem_free(i, sizeof (*i)); 2848 } 2849 2850 static void 2851 xdf_hvm_init(void) 2852 { 2853 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), 2854 offsetof(xdf_hvm_entry_t, xdf_he_list)); 2855 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); 2856 } 2857 2858 static void 2859 xdf_hvm_fini(void) 2860 { 2861 ASSERT(list_head(&xdf_hvm_list) == NULL); 2862 list_destroy(&xdf_hvm_list); 2863 mutex_destroy(&xdf_hvm_list_lock); 2864 } 2865 2866 int 2867 xdf_hvm_connect(dev_info_t *dip) 2868 { 2869 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2870 int rv; 2871 2872 /* do cv_wait until connected or failed */ 2873 mutex_enter(&vdp->xdf_dev_lk); 2874 rv = xdf_connect(vdp, B_TRUE); 2875 mutex_exit(&vdp->xdf_dev_lk); 2876 return ((rv == XD_READY) ? 0 : -1); 2877 } 2878 2879 int 2880 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2881 { 2882 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2883 2884 /* sanity check the requested physical geometry */ 2885 mutex_enter(&vdp->xdf_dev_lk); 2886 if ((geomp->g_secsize != XB_BSIZE) || 2887 (geomp->g_capacity == 0)) { 2888 mutex_exit(&vdp->xdf_dev_lk); 2889 return (EINVAL); 2890 } 2891 2892 /* 2893 * If we've already connected to the backend device then make sure 2894 * we're not defining a physical geometry larger than our backend 2895 * device. 2896 */ 2897 if ((vdp->xdf_xdev_nblocks != 0) && 2898 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { 2899 mutex_exit(&vdp->xdf_dev_lk); 2900 return (EINVAL); 2901 } 2902 2903 vdp->xdf_pgeom = *geomp; 2904 mutex_exit(&vdp->xdf_dev_lk); 2905 2906 /* force a re-validation */ 2907 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 2908 2909 return (0); 2910 } 2911 2912 #endif /* XPV_HVM_DRIVER */ 2913