1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * xdf.c - Xen Virtual Block Device Driver 29 * TODO: 30 * - support alternate block size (currently only DEV_BSIZE supported) 31 * - revalidate geometry for removable devices 32 */ 33 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/conf.h> 39 #include <sys/cmlb.h> 40 #include <sys/dkio.h> 41 #include <sys/promif.h> 42 #include <sys/sysmacros.h> 43 #include <sys/kstat.h> 44 #include <sys/mach_mmu.h> 45 #ifdef XPV_HVM_DRIVER 46 #include <sys/xpv_support.h> 47 #include <sys/sunndi.h> 48 #endif /* XPV_HVM_DRIVER */ 49 #include <public/io/xenbus.h> 50 #include <xen/sys/xenbus_impl.h> 51 #include <xen/sys/xendev.h> 52 #include <sys/gnttab.h> 53 #include <sys/scsi/generic/inquiry.h> 54 #include <xen/io/blkif_impl.h> 55 #include <io/xdf.h> 56 57 #define FLUSH_DISKCACHE 0x1 58 #define WRITE_BARRIER 0x2 59 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 60 #define USE_WRITE_BARRIER(vdp) \ 61 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 62 #define USE_FLUSH_DISKCACHE(vdp) \ 63 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 64 #define IS_WRITE_BARRIER(vdp, bp) \ 65 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 66 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 67 #define IS_FLUSH_DISKCACHE(bp) \ 68 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 69 70 static void *vbd_ss; 71 static kmem_cache_t *xdf_vreq_cache; 72 static kmem_cache_t *xdf_gs_cache; 73 static int xdf_maxphys = XB_MAXPHYS; 74 int xdfdebug = 0; 75 extern int do_polled_io; 76 diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 77 int xdf_barrier_flush_disable = 0; 78 79 /* 80 * dev_ops and cb_ops entrypoints 81 */ 82 static int xdf_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 83 static int xdf_attach(dev_info_t *, ddi_attach_cmd_t); 84 static int xdf_detach(dev_info_t *, ddi_detach_cmd_t); 85 static int xdf_reset(dev_info_t *, ddi_reset_cmd_t); 86 static int xdf_open(dev_t *, int, int, cred_t *); 87 static int xdf_close(dev_t, int, int, struct cred *); 88 static int xdf_strategy(struct buf *); 89 static int xdf_read(dev_t, struct uio *, cred_t *); 90 static int xdf_aread(dev_t, struct aio_req *, cred_t *); 91 static int xdf_write(dev_t, struct uio *, cred_t *); 92 static int xdf_awrite(dev_t, struct aio_req *, cred_t *); 93 static int xdf_dump(dev_t, caddr_t, daddr_t, int); 94 static int xdf_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 95 static uint_t xdf_intr(caddr_t); 96 static int xdf_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 97 caddr_t, int *); 98 99 /* 100 * misc private functions 101 */ 102 static int xdf_suspend(dev_info_t *); 103 static int xdf_resume(dev_info_t *); 104 static int xdf_start_connect(xdf_t *); 105 static int xdf_start_disconnect(xdf_t *); 106 static int xdf_post_connect(xdf_t *); 107 static void xdf_post_disconnect(xdf_t *); 108 static void xdf_oe_change(dev_info_t *, ddi_eventcookie_t, void *, void *); 109 static void xdf_iostart(xdf_t *); 110 static void xdf_iofini(xdf_t *, uint64_t, int); 111 static int xdf_prepare_rreq(xdf_t *, struct buf *, blkif_request_t *); 112 static int xdf_drain_io(xdf_t *); 113 static boolean_t xdf_isopen(xdf_t *, int); 114 static int xdf_check_state_transition(xdf_t *, XenbusState); 115 static int xdf_connect(xdf_t *, boolean_t); 116 static int xdf_dmacallback(caddr_t); 117 static void xdf_timeout_handler(void *); 118 static uint_t xdf_iorestart(caddr_t); 119 static v_req_t *vreq_get(xdf_t *, buf_t *); 120 static void vreq_free(xdf_t *, v_req_t *); 121 static int vreq_setup(xdf_t *, v_req_t *); 122 static ge_slot_t *gs_get(xdf_t *, int); 123 static void gs_free(xdf_t *, ge_slot_t *); 124 static grant_ref_t gs_grant(ge_slot_t *, mfn_t); 125 static void unexpectedie(xdf_t *); 126 static void xdfmin(struct buf *); 127 static void xdf_synthetic_pgeom(dev_info_t *, cmlb_geom_t *); 128 extern int xdf_kstat_create(dev_info_t *, char *, int); 129 extern void xdf_kstat_delete(dev_info_t *); 130 131 #if defined(XPV_HVM_DRIVER) 132 static void xdf_hvm_add(dev_info_t *); 133 static void xdf_hvm_rm(dev_info_t *); 134 static void xdf_hvm_init(void); 135 static void xdf_hvm_fini(void); 136 #endif /* XPV_HVM_DRIVER */ 137 138 static struct cb_ops xdf_cbops = { 139 xdf_open, 140 xdf_close, 141 xdf_strategy, 142 nodev, 143 xdf_dump, 144 xdf_read, 145 xdf_write, 146 xdf_ioctl, 147 nodev, 148 nodev, 149 nodev, 150 nochpoll, 151 xdf_prop_op, 152 NULL, 153 D_MP | D_NEW | D_64BIT, 154 CB_REV, 155 xdf_aread, 156 xdf_awrite 157 }; 158 159 struct dev_ops xdf_devops = { 160 DEVO_REV, /* devo_rev */ 161 0, /* devo_refcnt */ 162 xdf_getinfo, /* devo_getinfo */ 163 nulldev, /* devo_identify */ 164 nulldev, /* devo_probe */ 165 xdf_attach, /* devo_attach */ 166 xdf_detach, /* devo_detach */ 167 xdf_reset, /* devo_reset */ 168 &xdf_cbops, /* devo_cb_ops */ 169 (struct bus_ops *)NULL /* devo_bus_ops */ 170 }; 171 172 static struct modldrv modldrv = { 173 &mod_driverops, /* Type of module. This one is a driver */ 174 "virtual block driver %I%", /* short description */ 175 &xdf_devops /* driver specific ops */ 176 }; 177 178 static struct modlinkage xdf_modlinkage = { 179 MODREV_1, (void *)&modldrv, NULL 180 }; 181 182 /* 183 * I/O buffer DMA attributes 184 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 185 */ 186 static ddi_dma_attr_t xb_dma_attr = { 187 DMA_ATTR_V0, 188 (uint64_t)0, /* lowest address */ 189 (uint64_t)0xffffffffffffffff, /* highest usable address */ 190 (uint64_t)0xffffff, /* DMA counter limit max */ 191 (uint64_t)XB_BSIZE, /* alignment in bytes */ 192 XB_BSIZE - 1, /* bitmap of burst sizes */ 193 XB_BSIZE, /* min transfer */ 194 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 195 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 196 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 197 XB_BSIZE, /* granularity */ 198 0, /* flags (reserved) */ 199 }; 200 201 static ddi_device_acc_attr_t xc_acc_attr = { 202 DDI_DEVICE_ATTR_V0, 203 DDI_NEVERSWAP_ACC, 204 DDI_STRICTORDER_ACC 205 }; 206 207 /* callbacks from commmon label */ 208 209 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); 210 int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 211 212 static cmlb_tg_ops_t xdf_lb_ops = { 213 TG_DK_OPS_VERSION_1, 214 xdf_lb_rdwr, 215 xdf_lb_getinfo 216 }; 217 218 int 219 _init(void) 220 { 221 int rc; 222 223 if ((rc = ddi_soft_state_init(&vbd_ss, sizeof (xdf_t), 0)) != 0) 224 return (rc); 225 226 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 227 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 228 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 229 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 230 231 #if defined(XPV_HVM_DRIVER) 232 xdf_hvm_init(); 233 #endif /* XPV_HVM_DRIVER */ 234 235 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 236 #if defined(XPV_HVM_DRIVER) 237 xdf_hvm_fini(); 238 #endif /* XPV_HVM_DRIVER */ 239 kmem_cache_destroy(xdf_vreq_cache); 240 kmem_cache_destroy(xdf_gs_cache); 241 ddi_soft_state_fini(&vbd_ss); 242 return (rc); 243 } 244 245 return (rc); 246 } 247 248 int 249 _fini(void) 250 { 251 252 int err; 253 if ((err = mod_remove(&xdf_modlinkage)) != 0) 254 return (err); 255 256 #if defined(XPV_HVM_DRIVER) 257 xdf_hvm_fini(); 258 #endif /* XPV_HVM_DRIVER */ 259 260 kmem_cache_destroy(xdf_vreq_cache); 261 kmem_cache_destroy(xdf_gs_cache); 262 ddi_soft_state_fini(&vbd_ss); 263 264 return (0); 265 } 266 267 int 268 _info(struct modinfo *modinfop) 269 { 270 return (mod_info(&xdf_modlinkage, modinfop)); 271 } 272 273 /*ARGSUSED*/ 274 static int 275 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 276 { 277 int instance; 278 xdf_t *vbdp; 279 280 instance = XDF_INST(getminor((dev_t)arg)); 281 282 switch (cmd) { 283 case DDI_INFO_DEVT2DEVINFO: 284 if ((vbdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) { 285 *rp = NULL; 286 return (DDI_FAILURE); 287 } 288 *rp = vbdp->xdf_dip; 289 return (DDI_SUCCESS); 290 291 case DDI_INFO_DEVT2INSTANCE: 292 *rp = (void *)(uintptr_t)instance; 293 return (DDI_SUCCESS); 294 295 default: 296 return (DDI_FAILURE); 297 } 298 } 299 300 static int 301 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 302 char *name, caddr_t valuep, int *lengthp) 303 { 304 int instance = ddi_get_instance(dip); 305 xdf_t *vdp; 306 diskaddr_t p_blkcnt; 307 308 /* 309 * xdf dynamic properties are device specific and size oriented. 310 * Requests issued under conditions where size is valid are passed 311 * to ddi_prop_op_nblocks with the size information, otherwise the 312 * request is passed to ddi_prop_op. 313 */ 314 vdp = ddi_get_soft_state(vbd_ss, instance); 315 316 if ((dev == DDI_DEV_T_ANY) || (vdp == NULL)) 317 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 318 name, valuep, lengthp)); 319 320 /* do cv_wait until connected or failed */ 321 mutex_enter(&vdp->xdf_dev_lk); 322 if (xdf_connect(vdp, B_TRUE) != XD_READY) { 323 mutex_exit(&vdp->xdf_dev_lk); 324 goto out; 325 } 326 mutex_exit(&vdp->xdf_dev_lk); 327 328 if (cmlb_partinfo(vdp->xdf_vd_lbl, XDF_PART(getminor(dev)), &p_blkcnt, 329 NULL, NULL, NULL, NULL) == 0) 330 return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags, 331 name, valuep, lengthp, (uint64_t)p_blkcnt)); 332 333 out: 334 return (ddi_prop_op(dev, dip, prop_op, mod_flags, name, valuep, 335 lengthp)); 336 } 337 338 static int 339 xdf_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 340 { 341 xdf_t *vdp; 342 ddi_iblock_cookie_t softibc; 343 int instance; 344 345 xdfdebug = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_NOTPROM, 346 "xdfdebug", 0); 347 348 switch (cmd) { 349 case DDI_ATTACH: 350 break; 351 352 case DDI_RESUME: 353 return (xdf_resume(devi)); 354 355 default: 356 return (DDI_FAILURE); 357 } 358 359 instance = ddi_get_instance(devi); 360 if (ddi_soft_state_zalloc(vbd_ss, instance) != DDI_SUCCESS) 361 return (DDI_FAILURE); 362 363 DPRINTF(DDI_DBG, ("xdf%d: attaching\n", instance)); 364 vdp = ddi_get_soft_state(vbd_ss, instance); 365 ddi_set_driver_private(devi, vdp); 366 vdp->xdf_dip = devi; 367 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 368 369 if (ddi_get_iblock_cookie(devi, 0, &vdp->xdf_ibc) != DDI_SUCCESS) { 370 cmn_err(CE_WARN, "xdf@%s: failed to get iblock cookie", 371 ddi_get_name_addr(devi)); 372 goto errout0; 373 } 374 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)vdp->xdf_ibc); 375 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)vdp->xdf_ibc); 376 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, 377 (void *)vdp->xdf_ibc); 378 379 if (ddi_get_soft_iblock_cookie(devi, DDI_SOFTINT_LOW, &softibc) 380 != DDI_SUCCESS) { 381 cmn_err(CE_WARN, "xdf@%s: failed to get softintr iblock cookie", 382 ddi_get_name_addr(devi)); 383 goto errout0; 384 } 385 if (ddi_add_softintr(devi, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 386 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 387 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 388 ddi_get_name_addr(devi)); 389 goto errout0; 390 } 391 392 #if !defined(XPV_HVM_DRIVER) 393 /* create kstat for iostat(1M) */ 394 if (xdf_kstat_create(devi, "xdf", instance) != 0) { 395 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 396 ddi_get_name_addr(devi)); 397 goto errout0; 398 } 399 #endif /* !XPV_HVM_DRIVER */ 400 401 /* driver handles kernel-issued IOCTLs */ 402 if (ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP, 403 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 404 cmn_err(CE_WARN, "xdf@%s: cannot create DDI_KERNEL_IOCTL prop", 405 ddi_get_name_addr(devi)); 406 goto errout0; 407 } 408 409 /* 410 * Initialize the physical geometry stucture. Note that currently 411 * we don't know the size of the backend device so the number 412 * of blocks on the device will be initialized to zero. Once 413 * we connect to the backend device we'll update the physical 414 * geometry to reflect the real size of the device. 415 */ 416 xdf_synthetic_pgeom(devi, &vdp->xdf_pgeom); 417 418 /* 419 * create default device minor nodes: non-removable disk 420 * we will adjust minor nodes after we are connected w/ backend 421 */ 422 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 423 if (cmlb_attach(devi, &xdf_lb_ops, DTYPE_DIRECT, 0, 1, 424 DDI_NT_BLOCK_XVMD, 425 #if defined(XPV_HVM_DRIVER) 426 CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT | 427 CMLB_INTERNAL_MINOR_NODES, 428 #else /* !XPV_HVM_DRIVER */ 429 CMLB_FAKE_LABEL_ONE_PARTITION, 430 #endif /* !XPV_HVM_DRIVER */ 431 vdp->xdf_vd_lbl, NULL) != 0) { 432 cmn_err(CE_WARN, "xdf@%s: default cmlb attach failed", 433 ddi_get_name_addr(devi)); 434 goto errout0; 435 } 436 437 /* 438 * We ship with cache-enabled disks 439 */ 440 vdp->xdf_wce = 1; 441 442 mutex_enter(&vdp->xdf_cb_lk); 443 444 /* Watch backend XenbusState change */ 445 if (xvdi_add_event_handler(devi, XS_OE_STATE, 446 xdf_oe_change) != DDI_SUCCESS) { 447 mutex_exit(&vdp->xdf_cb_lk); 448 goto errout0; 449 } 450 451 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 452 cmn_err(CE_WARN, "xdf@%s: start connection failed", 453 ddi_get_name_addr(devi)); 454 (void) xdf_start_disconnect(vdp); 455 mutex_exit(&vdp->xdf_cb_lk); 456 goto errout1; 457 } 458 459 mutex_exit(&vdp->xdf_cb_lk); 460 461 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 462 offsetof(v_req_t, v_link)); 463 list_create(&vdp->xdf_gs_act, sizeof (ge_slot_t), 464 offsetof(ge_slot_t, link)); 465 466 #if defined(XPV_HVM_DRIVER) 467 xdf_hvm_add(devi); 468 469 (void) ddi_prop_update_int(DDI_DEV_T_NONE, devi, DDI_NO_AUTODETACH, 1); 470 471 /* 472 * Report our version to dom0. 473 */ 474 if (xenbus_printf(XBT_NULL, "hvmpv/xdf", "version", "%d", 475 HVMPV_XDF_VERS)) 476 cmn_err(CE_WARN, "xdf: couldn't write version\n"); 477 #endif /* XPV_HVM_DRIVER */ 478 479 ddi_report_dev(devi); 480 481 DPRINTF(DDI_DBG, ("xdf%d: attached\n", instance)); 482 483 return (DDI_SUCCESS); 484 485 errout1: 486 xvdi_remove_event_handler(devi, XS_OE_STATE); 487 errout0: 488 if (vdp->xdf_vd_lbl != NULL) { 489 cmlb_detach(vdp->xdf_vd_lbl, NULL); 490 cmlb_free_handle(&vdp->xdf_vd_lbl); 491 vdp->xdf_vd_lbl = NULL; 492 } 493 #if !defined(XPV_HVM_DRIVER) 494 xdf_kstat_delete(devi); 495 #endif /* !XPV_HVM_DRIVER */ 496 if (vdp->xdf_softintr_id != NULL) 497 ddi_remove_softintr(vdp->xdf_softintr_id); 498 if (vdp->xdf_ibc != NULL) { 499 mutex_destroy(&vdp->xdf_cb_lk); 500 mutex_destroy(&vdp->xdf_dev_lk); 501 } 502 cv_destroy(&vdp->xdf_dev_cv); 503 ddi_soft_state_free(vbd_ss, instance); 504 ddi_set_driver_private(devi, NULL); 505 ddi_prop_remove_all(devi); 506 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(devi)); 507 return (DDI_FAILURE); 508 } 509 510 static int 511 xdf_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 512 { 513 xdf_t *vdp; 514 int instance; 515 516 switch (cmd) { 517 518 case DDI_PM_SUSPEND: 519 break; 520 521 case DDI_SUSPEND: 522 return (xdf_suspend(devi)); 523 524 case DDI_DETACH: 525 break; 526 527 default: 528 return (DDI_FAILURE); 529 } 530 531 instance = ddi_get_instance(devi); 532 DPRINTF(DDI_DBG, ("xdf%d: detaching\n", instance)); 533 vdp = ddi_get_soft_state(vbd_ss, instance); 534 535 if (vdp == NULL) 536 return (DDI_FAILURE); 537 538 mutex_enter(&vdp->xdf_dev_lk); 539 if (xdf_isopen(vdp, -1)) { 540 mutex_exit(&vdp->xdf_dev_lk); 541 return (DDI_FAILURE); 542 } 543 544 if (vdp->xdf_status != XD_CLOSED) { 545 mutex_exit(&vdp->xdf_dev_lk); 546 return (DDI_FAILURE); 547 } 548 549 #if defined(XPV_HVM_DRIVER) 550 xdf_hvm_rm(devi); 551 #endif /* XPV_HVM_DRIVER */ 552 553 ASSERT(!ISDMACBON(vdp)); 554 mutex_exit(&vdp->xdf_dev_lk); 555 556 if (vdp->xdf_timeout_id != 0) 557 (void) untimeout(vdp->xdf_timeout_id); 558 559 xvdi_remove_event_handler(devi, XS_OE_STATE); 560 561 /* we'll support backend running in domU later */ 562 #ifdef DOMU_BACKEND 563 (void) xvdi_post_event(devi, XEN_HP_REMOVE); 564 #endif 565 566 list_destroy(&vdp->xdf_vreq_act); 567 list_destroy(&vdp->xdf_gs_act); 568 ddi_prop_remove_all(devi); 569 xdf_kstat_delete(devi); 570 ddi_remove_softintr(vdp->xdf_softintr_id); 571 ddi_set_driver_private(devi, NULL); 572 cv_destroy(&vdp->xdf_dev_cv); 573 mutex_destroy(&vdp->xdf_cb_lk); 574 mutex_destroy(&vdp->xdf_dev_lk); 575 if (vdp->xdf_cache_flush_block != NULL) 576 kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE); 577 ddi_soft_state_free(vbd_ss, instance); 578 return (DDI_SUCCESS); 579 } 580 581 static int 582 xdf_suspend(dev_info_t *devi) 583 { 584 xdf_t *vdp; 585 int instance; 586 enum xdf_state st; 587 588 instance = ddi_get_instance(devi); 589 590 if (xdfdebug & SUSRES_DBG) 591 xen_printf("xdf_suspend: xdf#%d\n", instance); 592 593 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 594 return (DDI_FAILURE); 595 596 xvdi_suspend(devi); 597 598 mutex_enter(&vdp->xdf_cb_lk); 599 mutex_enter(&vdp->xdf_dev_lk); 600 st = vdp->xdf_status; 601 /* change status to stop further I/O requests */ 602 if (st == XD_READY) 603 vdp->xdf_status = XD_SUSPEND; 604 mutex_exit(&vdp->xdf_dev_lk); 605 mutex_exit(&vdp->xdf_cb_lk); 606 607 /* make sure no more I/O responses left in the ring buffer */ 608 if ((st == XD_INIT) || (st == XD_READY)) { 609 #ifdef XPV_HVM_DRIVER 610 ec_unbind_evtchn(vdp->xdf_evtchn); 611 xvdi_free_evtchn(devi); 612 #else /* !XPV_HVM_DRIVER */ 613 (void) ddi_remove_intr(devi, 0, NULL); 614 #endif /* !XPV_HVM_DRIVER */ 615 (void) xdf_drain_io(vdp); 616 /* 617 * no need to teardown the ring buffer here 618 * it will be simply re-init'ed during resume when 619 * we call xvdi_alloc_ring 620 */ 621 } 622 623 if (xdfdebug & SUSRES_DBG) 624 xen_printf("xdf_suspend: SUCCESS\n"); 625 626 return (DDI_SUCCESS); 627 } 628 629 /*ARGSUSED*/ 630 static int 631 xdf_resume(dev_info_t *devi) 632 { 633 xdf_t *vdp; 634 int instance; 635 636 instance = ddi_get_instance(devi); 637 if (xdfdebug & SUSRES_DBG) 638 xen_printf("xdf_resume: xdf%d\n", instance); 639 640 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 641 return (DDI_FAILURE); 642 643 mutex_enter(&vdp->xdf_cb_lk); 644 645 if (xvdi_resume(devi) != DDI_SUCCESS) { 646 mutex_exit(&vdp->xdf_cb_lk); 647 return (DDI_FAILURE); 648 } 649 650 mutex_enter(&vdp->xdf_dev_lk); 651 ASSERT(vdp->xdf_status != XD_READY); 652 vdp->xdf_status = XD_UNKNOWN; 653 mutex_exit(&vdp->xdf_dev_lk); 654 655 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 656 mutex_exit(&vdp->xdf_cb_lk); 657 return (DDI_FAILURE); 658 } 659 660 mutex_exit(&vdp->xdf_cb_lk); 661 662 if (xdfdebug & SUSRES_DBG) 663 xen_printf("xdf_resume: done\n"); 664 return (DDI_SUCCESS); 665 } 666 667 /*ARGSUSED*/ 668 static int 669 xdf_reset(dev_info_t *devi, ddi_reset_cmd_t cmd) 670 { 671 xdf_t *vdp; 672 int instance; 673 674 instance = ddi_get_instance(devi); 675 DPRINTF(DDI_DBG, ("xdf%d: resetting\n", instance)); 676 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 677 return (DDI_FAILURE); 678 679 /* 680 * wait for any outstanding I/O to complete 681 */ 682 (void) xdf_drain_io(vdp); 683 684 DPRINTF(DDI_DBG, ("xdf%d: reset complete\n", instance)); 685 return (DDI_SUCCESS); 686 } 687 688 static int 689 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 690 { 691 minor_t minor; 692 xdf_t *vdp; 693 int part; 694 ulong_t parbit; 695 diskaddr_t p_blkct = 0; 696 boolean_t firstopen; 697 boolean_t nodelay; 698 699 minor = getminor(*devp); 700 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 701 return (ENXIO); 702 703 nodelay = (flag & (FNDELAY | FNONBLOCK)); 704 705 DPRINTF(DDI_DBG, ("xdf%d: opening\n", XDF_INST(minor))); 706 707 /* do cv_wait until connected or failed */ 708 mutex_enter(&vdp->xdf_dev_lk); 709 if (!nodelay && (xdf_connect(vdp, B_TRUE) != XD_READY)) { 710 mutex_exit(&vdp->xdf_dev_lk); 711 return (ENXIO); 712 } 713 714 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 715 mutex_exit(&vdp->xdf_dev_lk); 716 return (EROFS); 717 } 718 719 part = XDF_PART(minor); 720 parbit = 1 << part; 721 if ((vdp->xdf_vd_exclopen & parbit) || 722 ((flag & FEXCL) && xdf_isopen(vdp, part))) { 723 mutex_exit(&vdp->xdf_dev_lk); 724 return (EBUSY); 725 } 726 727 /* are we the first one to open this node? */ 728 firstopen = !xdf_isopen(vdp, -1); 729 730 if (otyp == OTYP_LYR) 731 vdp->xdf_vd_lyropen[part]++; 732 733 vdp->xdf_vd_open[otyp] |= parbit; 734 735 if (flag & FEXCL) 736 vdp->xdf_vd_exclopen |= parbit; 737 738 mutex_exit(&vdp->xdf_dev_lk); 739 740 /* force a re-validation */ 741 if (firstopen) 742 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 743 744 /* 745 * check size 746 * ignore CD/DVD which contains a zero-sized s0 747 */ 748 if (!nodelay && !XD_IS_CD(vdp) && 749 ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 750 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0))) { 751 (void) xdf_close(*devp, flag, otyp, credp); 752 return (ENXIO); 753 } 754 755 return (0); 756 } 757 758 /*ARGSUSED*/ 759 static int 760 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 761 { 762 minor_t minor; 763 xdf_t *vdp; 764 int part; 765 ulong_t parbit; 766 767 minor = getminor(dev); 768 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 769 return (ENXIO); 770 771 mutex_enter(&vdp->xdf_dev_lk); 772 part = XDF_PART(minor); 773 if (!xdf_isopen(vdp, part)) { 774 mutex_exit(&vdp->xdf_dev_lk); 775 return (ENXIO); 776 } 777 parbit = 1 << part; 778 779 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); 780 if (otyp == OTYP_LYR) { 781 ASSERT(vdp->xdf_vd_lyropen[part] > 0); 782 if (--vdp->xdf_vd_lyropen[part] == 0) 783 vdp->xdf_vd_open[otyp] &= ~parbit; 784 } else { 785 vdp->xdf_vd_open[otyp] &= ~parbit; 786 } 787 vdp->xdf_vd_exclopen &= ~parbit; 788 789 mutex_exit(&vdp->xdf_dev_lk); 790 return (0); 791 } 792 793 static int 794 xdf_strategy(struct buf *bp) 795 { 796 xdf_t *vdp; 797 minor_t minor; 798 diskaddr_t p_blkct, p_blkst; 799 ulong_t nblks; 800 int part; 801 802 minor = getminor(bp->b_edev); 803 part = XDF_PART(minor); 804 805 vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)); 806 if ((vdp == NULL) || !xdf_isopen(vdp, part)) { 807 bioerror(bp, ENXIO); 808 bp->b_resid = bp->b_bcount; 809 biodone(bp); 810 return (0); 811 } 812 813 /* Check for writes to a read only device */ 814 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 815 bioerror(bp, EROFS); 816 bp->b_resid = bp->b_bcount; 817 biodone(bp); 818 return (0); 819 } 820 821 /* Check if this I/O is accessing a partition or the entire disk */ 822 if ((long)bp->b_private == XB_SLICE_NONE) { 823 /* This I/O is using an absolute offset */ 824 p_blkct = vdp->xdf_xdev_nblocks; 825 p_blkst = 0; 826 } else { 827 /* This I/O is using a partition relative offset */ 828 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 829 &p_blkst, NULL, NULL, NULL)) { 830 bioerror(bp, ENXIO); 831 bp->b_resid = bp->b_bcount; 832 biodone(bp); 833 return (0); 834 } 835 } 836 837 /* check for a starting block beyond the disk or partition limit */ 838 if (bp->b_blkno > p_blkct) { 839 DPRINTF(IO_DBG, ("xdf: block %lld exceeds VBD size %"PRIu64, 840 (longlong_t)bp->b_blkno, (uint64_t)p_blkct)); 841 bioerror(bp, EINVAL); 842 bp->b_resid = bp->b_bcount; 843 biodone(bp); 844 return (0); 845 } 846 847 /* Legacy: don't set error flag at this case */ 848 if (bp->b_blkno == p_blkct) { 849 bp->b_resid = bp->b_bcount; 850 biodone(bp); 851 return (0); 852 } 853 854 /* Adjust for partial transfer */ 855 nblks = bp->b_bcount >> XB_BSHIFT; 856 if ((bp->b_blkno + nblks) > p_blkct) { 857 bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT; 858 bp->b_bcount -= bp->b_resid; 859 } 860 861 DPRINTF(IO_DBG, ("xdf: strategy blk %lld len %lu\n", 862 (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount)); 863 864 /* Fix up the buf struct */ 865 bp->b_flags |= B_BUSY; 866 bp->av_forw = bp->av_back = NULL; /* not tagged with a v_req */ 867 bp->b_private = (void *)(uintptr_t)p_blkst; 868 869 mutex_enter(&vdp->xdf_dev_lk); 870 if (vdp->xdf_xdev_iostat != NULL) 871 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 872 if (vdp->xdf_f_act == NULL) { 873 vdp->xdf_f_act = vdp->xdf_l_act = bp; 874 } else { 875 vdp->xdf_l_act->av_forw = bp; 876 vdp->xdf_l_act = bp; 877 } 878 mutex_exit(&vdp->xdf_dev_lk); 879 880 xdf_iostart(vdp); 881 if (do_polled_io) 882 (void) xdf_drain_io(vdp); 883 return (0); 884 } 885 886 /*ARGSUSED*/ 887 static int 888 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 889 { 890 891 xdf_t *vdp; 892 minor_t minor; 893 diskaddr_t p_blkcnt; 894 int part; 895 896 minor = getminor(dev); 897 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 898 return (ENXIO); 899 900 DPRINTF(IO_DBG, ("xdf: read offset 0x%"PRIx64"\n", 901 (int64_t)uiop->uio_offset)); 902 903 part = XDF_PART(minor); 904 if (!xdf_isopen(vdp, part)) 905 return (ENXIO); 906 907 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 908 NULL, NULL, NULL, NULL)) 909 return (ENXIO); 910 911 if (U_INVAL(uiop)) 912 return (EINVAL); 913 914 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 915 } 916 917 /*ARGSUSED*/ 918 static int 919 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 920 { 921 xdf_t *vdp; 922 minor_t minor; 923 diskaddr_t p_blkcnt; 924 int part; 925 926 minor = getminor(dev); 927 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 928 return (ENXIO); 929 930 DPRINTF(IO_DBG, ("xdf: write offset 0x%"PRIx64"\n", 931 (int64_t)uiop->uio_offset)); 932 933 part = XDF_PART(minor); 934 if (!xdf_isopen(vdp, part)) 935 return (ENXIO); 936 937 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 938 NULL, NULL, NULL, NULL)) 939 return (ENXIO); 940 941 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 942 return (ENOSPC); 943 944 if (U_INVAL(uiop)) 945 return (EINVAL); 946 947 return (physio(xdf_strategy, NULL, dev, B_WRITE, minphys, uiop)); 948 } 949 950 /*ARGSUSED*/ 951 static int 952 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 953 { 954 xdf_t *vdp; 955 minor_t minor; 956 struct uio *uiop = aiop->aio_uio; 957 diskaddr_t p_blkcnt; 958 int part; 959 960 minor = getminor(dev); 961 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 962 return (ENXIO); 963 964 part = XDF_PART(minor); 965 if (!xdf_isopen(vdp, part)) 966 return (ENXIO); 967 968 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 969 NULL, NULL, NULL, NULL)) 970 return (ENXIO); 971 972 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 973 return (ENOSPC); 974 975 if (U_INVAL(uiop)) 976 return (EINVAL); 977 978 return (aphysio(xdf_strategy, anocancel, dev, B_READ, minphys, aiop)); 979 } 980 981 /*ARGSUSED*/ 982 static int 983 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 984 { 985 xdf_t *vdp; 986 minor_t minor; 987 struct uio *uiop = aiop->aio_uio; 988 diskaddr_t p_blkcnt; 989 int part; 990 991 minor = getminor(dev); 992 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 993 return (ENXIO); 994 995 part = XDF_PART(minor); 996 if (!xdf_isopen(vdp, part)) 997 return (ENXIO); 998 999 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 1000 NULL, NULL, NULL, NULL)) 1001 return (ENXIO); 1002 1003 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 1004 return (ENOSPC); 1005 1006 if (U_INVAL(uiop)) 1007 return (EINVAL); 1008 1009 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, minphys, aiop)); 1010 } 1011 1012 static int 1013 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1014 { 1015 struct buf dumpbuf, *dbp; 1016 xdf_t *vdp; 1017 minor_t minor; 1018 int err = 0; 1019 int part; 1020 diskaddr_t p_blkcnt, p_blkst; 1021 1022 minor = getminor(dev); 1023 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 1024 return (ENXIO); 1025 1026 DPRINTF(IO_DBG, ("xdf: dump addr (0x%p) blk (%ld) nblks (%d)\n", 1027 addr, blkno, nblk)); 1028 1029 part = XDF_PART(minor); 1030 if (!xdf_isopen(vdp, part)) 1031 return (ENXIO); 1032 1033 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 1034 NULL, NULL, NULL)) 1035 return (ENXIO); 1036 1037 if ((blkno + nblk) > p_blkcnt) { 1038 cmn_err(CE_WARN, "xdf: block %ld exceeds VBD size %"PRIu64, 1039 blkno + nblk, (uint64_t)p_blkcnt); 1040 return (EINVAL); 1041 } 1042 1043 dbp = &dumpbuf; 1044 bioinit(dbp); 1045 dbp->b_flags = B_BUSY; 1046 dbp->b_un.b_addr = addr; 1047 dbp->b_bcount = nblk << DEV_BSHIFT; 1048 dbp->b_blkno = blkno; 1049 dbp->b_edev = dev; 1050 dbp->b_private = (void *)(uintptr_t)p_blkst; 1051 1052 mutex_enter(&vdp->xdf_dev_lk); 1053 if (vdp->xdf_xdev_iostat != NULL) 1054 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1055 if (vdp->xdf_f_act == NULL) { 1056 vdp->xdf_f_act = vdp->xdf_l_act = dbp; 1057 } else { 1058 vdp->xdf_l_act->av_forw = dbp; 1059 vdp->xdf_l_act = dbp; 1060 } 1061 dbp->av_forw = NULL; 1062 dbp->av_back = NULL; 1063 mutex_exit(&vdp->xdf_dev_lk); 1064 xdf_iostart(vdp); 1065 err = xdf_drain_io(vdp); 1066 biofini(dbp); 1067 return (err); 1068 } 1069 1070 /*ARGSUSED*/ 1071 static int 1072 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 1073 int *rvalp) 1074 { 1075 int instance; 1076 xdf_t *vdp; 1077 minor_t minor; 1078 int part; 1079 1080 minor = getminor(dev); 1081 instance = XDF_INST(minor); 1082 1083 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 1084 return (ENXIO); 1085 1086 DPRINTF(IOCTL_DBG, ("xdf%d:ioctl: cmd %d (0x%x)\n", 1087 instance, cmd, cmd)); 1088 1089 part = XDF_PART(minor); 1090 if (!xdf_isopen(vdp, part)) 1091 return (ENXIO); 1092 1093 switch (cmd) { 1094 case DKIOCGMEDIAINFO: { 1095 struct dk_minfo media_info; 1096 1097 media_info.dki_lbsize = DEV_BSIZE; 1098 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; 1099 media_info.dki_media_type = DK_FIXED_DISK; 1100 1101 if (ddi_copyout(&media_info, (void *)arg, 1102 sizeof (struct dk_minfo), mode)) { 1103 return (EFAULT); 1104 } else { 1105 return (0); 1106 } 1107 } 1108 1109 case DKIOCINFO: { 1110 struct dk_cinfo info; 1111 1112 /* controller information */ 1113 if (XD_IS_CD(vdp)) 1114 info.dki_ctype = DKC_CDROM; 1115 else 1116 info.dki_ctype = DKC_VBD; 1117 1118 info.dki_cnum = 0; 1119 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 1120 1121 /* unit information */ 1122 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 1123 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 1124 info.dki_flags = DKI_FMTVOL; 1125 info.dki_partition = part; 1126 info.dki_maxtransfer = maxphys / DEV_BSIZE; 1127 info.dki_addr = 0; 1128 info.dki_space = 0; 1129 info.dki_prio = 0; 1130 info.dki_vec = 0; 1131 1132 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 1133 return (EFAULT); 1134 else 1135 return (0); 1136 } 1137 1138 case DKIOCSTATE: { 1139 enum dkio_state dkstate = DKIO_INSERTED; 1140 if (ddi_copyout(&dkstate, (void *)arg, sizeof (dkstate), 1141 mode) != 0) 1142 return (EFAULT); 1143 return (0); 1144 } 1145 1146 /* 1147 * is media removable? 1148 */ 1149 case DKIOCREMOVABLE: { 1150 int i = XD_IS_RM(vdp) ? 1 : 0; 1151 if (ddi_copyout(&i, (caddr_t)arg, sizeof (int), mode)) 1152 return (EFAULT); 1153 return (0); 1154 } 1155 1156 case DKIOCG_PHYGEOM: 1157 case DKIOCG_VIRTGEOM: 1158 case DKIOCGGEOM: 1159 case DKIOCSGEOM: 1160 case DKIOCGAPART: 1161 case DKIOCSAPART: 1162 case DKIOCGVTOC: 1163 case DKIOCSVTOC: 1164 case DKIOCPARTINFO: 1165 case DKIOCGMBOOT: 1166 case DKIOCSMBOOT: 1167 case DKIOCGETEFI: 1168 case DKIOCSETEFI: 1169 case DKIOCPARTITION: { 1170 int rc; 1171 1172 rc = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 1173 rvalp, NULL); 1174 return (rc); 1175 } 1176 1177 case DKIOCGETWCE: 1178 if (ddi_copyout(&vdp->xdf_wce, (void *)arg, 1179 sizeof (vdp->xdf_wce), mode)) 1180 return (EFAULT); 1181 return (0); 1182 case DKIOCSETWCE: 1183 if (ddi_copyin((void *)arg, &vdp->xdf_wce, 1184 sizeof (vdp->xdf_wce), mode)) 1185 return (EFAULT); 1186 return (0); 1187 case DKIOCFLUSHWRITECACHE: { 1188 int rc; 1189 struct dk_callback *dkc = (struct dk_callback *)arg; 1190 1191 if (vdp->xdf_flush_supported) { 1192 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1193 NULL, 0, 0, (void *)dev); 1194 } else if (vdp->xdf_feature_barrier && 1195 !xdf_barrier_flush_disable) { 1196 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1197 vdp->xdf_cache_flush_block, xdf_flush_block, 1198 DEV_BSIZE, (void *)dev); 1199 } else { 1200 return (ENOTTY); 1201 } 1202 if ((mode & FKIOCTL) && (dkc != NULL) && 1203 (dkc->dkc_callback != NULL)) { 1204 (*dkc->dkc_callback)(dkc->dkc_cookie, rc); 1205 /* need to return 0 after calling callback */ 1206 rc = 0; 1207 } 1208 return (rc); 1209 } 1210 1211 default: 1212 return (ENOTTY); 1213 } 1214 } 1215 1216 /* 1217 * xdf interrupt handler 1218 */ 1219 static uint_t 1220 xdf_intr(caddr_t arg) 1221 { 1222 xdf_t *vdp = (xdf_t *)arg; 1223 xendev_ring_t *xbr; 1224 blkif_response_t *resp; 1225 int bioerr; 1226 uint64_t id; 1227 extern int do_polled_io; 1228 uint8_t op; 1229 uint16_t status; 1230 ddi_acc_handle_t acchdl; 1231 1232 mutex_enter(&vdp->xdf_dev_lk); 1233 1234 if ((xbr = vdp->xdf_xb_ring) == NULL) { 1235 mutex_exit(&vdp->xdf_dev_lk); 1236 return (DDI_INTR_UNCLAIMED); 1237 } 1238 1239 acchdl = vdp->xdf_xb_ring_hdl; 1240 1241 /* 1242 * complete all requests which have a response 1243 */ 1244 while (resp = xvdi_ring_get_response(xbr)) { 1245 id = ddi_get64(acchdl, &resp->id); 1246 op = ddi_get8(acchdl, &resp->operation); 1247 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 1248 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 1249 op, id, status)); 1250 1251 /* 1252 * XXPV - close connection to the backend and restart 1253 */ 1254 if (status != BLKIF_RSP_OKAY) { 1255 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 1256 ddi_get_name_addr(vdp->xdf_dip), 1257 (op == BLKIF_OP_READ) ? "reading" : "writing")); 1258 bioerr = EIO; 1259 } else { 1260 bioerr = 0; 1261 } 1262 1263 xdf_iofini(vdp, id, bioerr); 1264 } 1265 1266 mutex_exit(&vdp->xdf_dev_lk); 1267 1268 if (!do_polled_io) 1269 xdf_iostart(vdp); 1270 1271 return (DDI_INTR_CLAIMED); 1272 } 1273 1274 int xdf_fbrewrites; /* how many times was our flush block rewritten */ 1275 1276 /* 1277 * Snarf new data if our flush block was re-written 1278 */ 1279 static void 1280 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 1281 { 1282 int nblks; 1283 boolean_t mapin; 1284 1285 if (IS_WRITE_BARRIER(vdp, bp)) 1286 return; /* write was a flush write */ 1287 1288 mapin = B_FALSE; 1289 nblks = bp->b_bcount >> DEV_BSHIFT; 1290 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 1291 xdf_fbrewrites++; 1292 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 1293 mapin = B_TRUE; 1294 bp_mapin(bp); 1295 } 1296 bcopy(bp->b_un.b_addr + 1297 ((xdf_flush_block - blkno) << DEV_BSHIFT), 1298 vdp->xdf_cache_flush_block, DEV_BSIZE); 1299 if (mapin) 1300 bp_mapout(bp); 1301 } 1302 } 1303 1304 static void 1305 xdf_iofini(xdf_t *vdp, uint64_t id, int bioerr) 1306 { 1307 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 1308 v_req_t *vreq = gs->vreq; 1309 buf_t *bp = vreq->v_buf; 1310 1311 gs_free(vdp, gs); 1312 if (bioerr) 1313 bioerror(bp, bioerr); 1314 vreq->v_nslots--; 1315 if (vreq->v_nslots != 0) 1316 return; 1317 1318 XDF_UPDATE_IO_STAT(vdp, bp); 1319 if (vdp->xdf_xdev_iostat != NULL) 1320 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1321 1322 if (IS_ERROR(bp)) 1323 bp->b_resid = bp->b_bcount; 1324 1325 vreq_free(vdp, vreq); 1326 biodone(bp); 1327 } 1328 1329 /* 1330 * return value of xdf_prepare_rreq() 1331 * used in xdf_iostart() 1332 */ 1333 #define XF_PARTIAL 0 /* rreq is full, not all I/O in buf transferred */ 1334 #define XF_COMP 1 /* no more I/O left in buf */ 1335 1336 static void 1337 xdf_iostart(xdf_t *vdp) 1338 { 1339 xendev_ring_t *xbr; 1340 struct buf *bp; 1341 blkif_request_t *rreq; 1342 int retval; 1343 int rreqready = 0; 1344 1345 xbr = vdp->xdf_xb_ring; 1346 1347 /* 1348 * populate the ring request(s) 1349 * 1350 * loop until there is no buf to transfer or no free slot 1351 * available in I/O ring 1352 */ 1353 mutex_enter(&vdp->xdf_dev_lk); 1354 1355 for (;;) { 1356 if (vdp->xdf_status != XD_READY) 1357 break; 1358 1359 /* active buf queue empty? */ 1360 if ((bp = vdp->xdf_f_act) == NULL) 1361 break; 1362 1363 /* try to grab a vreq for this bp */ 1364 if ((BP2VREQ(bp) == NULL) && (vreq_get(vdp, bp) == NULL)) 1365 break; 1366 /* alloc DMA/GTE resources */ 1367 if (vreq_setup(vdp, BP2VREQ(bp)) != DDI_SUCCESS) 1368 break; 1369 1370 /* get next blkif_request in the ring */ 1371 if ((rreq = xvdi_ring_get_request(xbr)) == NULL) 1372 break; 1373 bzero(rreq, sizeof (blkif_request_t)); 1374 1375 /* populate blkif_request with this buf */ 1376 rreqready++; 1377 retval = xdf_prepare_rreq(vdp, bp, rreq); 1378 if (retval == XF_COMP) { 1379 /* finish this bp, switch to next one */ 1380 if (vdp->xdf_xdev_iostat != NULL) 1381 kstat_waitq_to_runq( 1382 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1383 vdp->xdf_f_act = bp->av_forw; 1384 bp->av_forw = NULL; 1385 } 1386 } 1387 1388 /* 1389 * Send the request(s) to the backend 1390 */ 1391 if (rreqready) { 1392 if (xvdi_ring_push_request(xbr)) { 1393 DPRINTF(IO_DBG, ("xdf_iostart: " 1394 "sent request(s) to backend\n")); 1395 xvdi_notify_oe(vdp->xdf_dip); 1396 } 1397 } 1398 1399 mutex_exit(&vdp->xdf_dev_lk); 1400 } 1401 1402 /* 1403 * populate a single blkif_request_t w/ a buf 1404 */ 1405 static int 1406 xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1407 { 1408 int rval; 1409 grant_ref_t gr; 1410 uint8_t fsect, lsect; 1411 size_t bcnt; 1412 paddr_t dma_addr; 1413 off_t blk_off; 1414 dev_info_t *dip = vdp->xdf_dip; 1415 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1416 v_req_t *vreq = BP2VREQ(bp); 1417 uint64_t blkno = vreq->v_blkno; 1418 uint_t ndmacs = vreq->v_ndmacs; 1419 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1420 int seg = 0; 1421 int isread = IS_READ(bp); 1422 1423 if (isread) 1424 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1425 else { 1426 switch (vreq->v_flush_diskcache) { 1427 case FLUSH_DISKCACHE: 1428 ddi_put8(acchdl, &rreq->operation, 1429 BLKIF_OP_FLUSH_DISKCACHE); 1430 ddi_put16(acchdl, &rreq->handle, vdev); 1431 ddi_put64(acchdl, &rreq->id, 1432 (uint64_t)(uintptr_t)(vreq->v_gs)); 1433 ddi_put8(acchdl, &rreq->nr_segments, 0); 1434 return (XF_COMP); 1435 case WRITE_BARRIER: 1436 ddi_put8(acchdl, &rreq->operation, 1437 BLKIF_OP_WRITE_BARRIER); 1438 break; 1439 default: 1440 if (!vdp->xdf_wce) 1441 ddi_put8(acchdl, &rreq->operation, 1442 BLKIF_OP_WRITE_BARRIER); 1443 else 1444 ddi_put8(acchdl, &rreq->operation, 1445 BLKIF_OP_WRITE); 1446 break; 1447 } 1448 } 1449 1450 ddi_put16(acchdl, &rreq->handle, vdev); 1451 ddi_put64(acchdl, &rreq->sector_number, blkno); 1452 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(vreq->v_gs)); 1453 1454 /* 1455 * loop until all segments are populated or no more dma cookie in buf 1456 */ 1457 for (;;) { 1458 /* 1459 * Each segment of a blkif request can transfer up to 1460 * one 4K page of data. 1461 */ 1462 bcnt = vreq->v_dmac.dmac_size; 1463 ASSERT(bcnt <= PAGESIZE); 1464 ASSERT((bcnt % XB_BSIZE) == 0); 1465 dma_addr = vreq->v_dmac.dmac_laddress; 1466 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1467 ASSERT((blk_off & XB_BMASK) == 0); 1468 fsect = blk_off >> XB_BSHIFT; 1469 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1470 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1471 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1472 DPRINTF(IO_DBG, (" ""seg%d: dmacS %lu blk_off %ld\n", 1473 seg, vreq->v_dmac.dmac_size, blk_off)); 1474 gr = gs_grant(vreq->v_gs, PATOMA(dma_addr) >> PAGESHIFT); 1475 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1476 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1477 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1478 DPRINTF(IO_DBG, (" ""seg%d: fs %d ls %d gr %d dma 0x%"PRIx64 1479 "\n", seg, fsect, lsect, gr, dma_addr)); 1480 1481 blkno += (bcnt >> XB_BSHIFT); 1482 seg++; 1483 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1484 if (--ndmacs) { 1485 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1486 continue; 1487 } 1488 1489 vreq->v_status = VREQ_DMAWIN_DONE; 1490 vreq->v_blkno = blkno; 1491 if (vreq->v_dmaw + 1 == vreq->v_ndmaws) 1492 /* last win */ 1493 rval = XF_COMP; 1494 else 1495 rval = XF_PARTIAL; 1496 break; 1497 } 1498 ddi_put8(acchdl, &rreq->nr_segments, seg); 1499 DPRINTF(IO_DBG, ("xdf_prepare_rreq: request id=%"PRIx64" ready\n", 1500 rreq->id)); 1501 1502 return (rval); 1503 } 1504 1505 #define XDF_QSEC 50000 /* .005 second */ 1506 #define XDF_POLLCNT 12 /* loop for 12 times before time out */ 1507 1508 static int 1509 xdf_drain_io(xdf_t *vdp) 1510 { 1511 int pollc, rval; 1512 xendev_ring_t *xbr; 1513 1514 if (xdfdebug & SUSRES_DBG) 1515 xen_printf("xdf_drain_io: start\n"); 1516 1517 mutex_enter(&vdp->xdf_dev_lk); 1518 1519 if ((vdp->xdf_status != XD_READY) && (vdp->xdf_status != XD_SUSPEND)) 1520 goto out; 1521 1522 rval = 0; 1523 xbr = vdp->xdf_xb_ring; 1524 ASSERT(xbr != NULL); 1525 1526 for (pollc = 0; pollc < XDF_POLLCNT; pollc++) { 1527 if (xvdi_ring_has_unconsumed_responses(xbr)) { 1528 mutex_exit(&vdp->xdf_dev_lk); 1529 (void) xdf_intr((caddr_t)vdp); 1530 mutex_enter(&vdp->xdf_dev_lk); 1531 } 1532 if (!xvdi_ring_has_incomp_request(xbr)) 1533 goto out; 1534 1535 #ifndef XPV_HVM_DRIVER 1536 (void) HYPERVISOR_yield(); 1537 #endif /* XPV_HVM_DRIVER */ 1538 /* 1539 * file-backed devices can be slow 1540 */ 1541 drv_usecwait(XDF_QSEC << pollc); 1542 } 1543 cmn_err(CE_WARN, "xdf_polled_io: timeout"); 1544 rval = EIO; 1545 out: 1546 mutex_exit(&vdp->xdf_dev_lk); 1547 if (xdfdebug & SUSRES_DBG) 1548 xen_printf("xdf_drain_io: end, err=%d\n", rval); 1549 return (rval); 1550 } 1551 1552 /* ARGSUSED5 */ 1553 int 1554 xdf_lb_rdwr(dev_info_t *devi, uchar_t cmd, void *bufp, 1555 diskaddr_t start, size_t reqlen, void *tg_cookie) 1556 { 1557 xdf_t *vdp; 1558 struct buf *bp; 1559 int err = 0; 1560 1561 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1562 if (vdp == NULL) 1563 return (ENXIO); 1564 1565 if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) 1566 return (EINVAL); 1567 1568 bp = getrbuf(KM_SLEEP); 1569 if (cmd == TG_READ) 1570 bp->b_flags = B_BUSY | B_READ; 1571 else 1572 bp->b_flags = B_BUSY | B_WRITE; 1573 bp->b_un.b_addr = bufp; 1574 bp->b_bcount = reqlen; 1575 bp->b_blkno = start; 1576 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 1577 1578 mutex_enter(&vdp->xdf_dev_lk); 1579 if (vdp->xdf_xdev_iostat != NULL) 1580 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1581 if (vdp->xdf_f_act == NULL) { 1582 vdp->xdf_f_act = vdp->xdf_l_act = bp; 1583 } else { 1584 vdp->xdf_l_act->av_forw = bp; 1585 vdp->xdf_l_act = bp; 1586 } 1587 mutex_exit(&vdp->xdf_dev_lk); 1588 xdf_iostart(vdp); 1589 err = biowait(bp); 1590 1591 ASSERT(bp->b_flags & B_DONE); 1592 1593 freerbuf(bp); 1594 return (err); 1595 } 1596 1597 /* 1598 * synthetic geometry 1599 */ 1600 #define XDF_NSECTS 256 1601 #define XDF_NHEADS 16 1602 1603 static void 1604 xdf_synthetic_pgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1605 { 1606 xdf_t *vdp; 1607 uint_t ncyl; 1608 1609 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1610 1611 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); 1612 1613 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1614 geomp->g_acyl = 0; 1615 geomp->g_nhead = XDF_NHEADS; 1616 geomp->g_secsize = XB_BSIZE; 1617 geomp->g_nsect = XDF_NSECTS; 1618 geomp->g_intrlv = 0; 1619 geomp->g_rpm = 7200; 1620 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1621 } 1622 1623 static int 1624 xdf_lb_getcap(dev_info_t *devi, diskaddr_t *capp) 1625 { 1626 xdf_t *vdp; 1627 1628 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1629 1630 if (vdp == NULL) 1631 return (ENXIO); 1632 1633 mutex_enter(&vdp->xdf_dev_lk); 1634 *capp = vdp->xdf_pgeom.g_capacity; 1635 DPRINTF(LBL_DBG, ("capacity %llu\n", *capp)); 1636 mutex_exit(&vdp->xdf_dev_lk); 1637 return (0); 1638 } 1639 1640 static int 1641 xdf_lb_getpgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1642 { 1643 xdf_t *vdp; 1644 1645 if ((vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi))) == NULL) 1646 return (ENXIO); 1647 *geomp = vdp->xdf_pgeom; 1648 return (0); 1649 } 1650 1651 /* 1652 * No real HBA, no geometry available from it 1653 */ 1654 /*ARGSUSED*/ 1655 static int 1656 xdf_lb_getvgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1657 { 1658 return (EINVAL); 1659 } 1660 1661 static int 1662 xdf_lb_getattribute(dev_info_t *devi, tg_attribute_t *tgattributep) 1663 { 1664 xdf_t *vdp; 1665 1666 if (!(vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)))) 1667 return (ENXIO); 1668 1669 if (XD_IS_RO(vdp)) 1670 tgattributep->media_is_writable = 0; 1671 else 1672 tgattributep->media_is_writable = 1; 1673 return (0); 1674 } 1675 1676 /* ARGSUSED3 */ 1677 int 1678 xdf_lb_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie) 1679 { 1680 switch (cmd) { 1681 case TG_GETPHYGEOM: 1682 return (xdf_lb_getpgeom(devi, (cmlb_geom_t *)arg)); 1683 case TG_GETVIRTGEOM: 1684 return (xdf_lb_getvgeom(devi, (cmlb_geom_t *)arg)); 1685 case TG_GETCAPACITY: 1686 return (xdf_lb_getcap(devi, (diskaddr_t *)arg)); 1687 case TG_GETBLOCKSIZE: 1688 *(uint32_t *)arg = XB_BSIZE; 1689 return (0); 1690 case TG_GETATTR: 1691 return (xdf_lb_getattribute(devi, (tg_attribute_t *)arg)); 1692 default: 1693 return (ENOTTY); 1694 } 1695 } 1696 1697 /* 1698 * Kick-off connect process 1699 * Status should be XD_UNKNOWN or XD_CLOSED 1700 * On success, status will be changed to XD_INIT 1701 * On error, status won't be changed 1702 */ 1703 static int 1704 xdf_start_connect(xdf_t *vdp) 1705 { 1706 char *xsnode; 1707 grant_ref_t gref; 1708 xenbus_transaction_t xbt; 1709 int rv; 1710 dev_info_t *dip = vdp->xdf_dip; 1711 1712 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == (domid_t)-1) 1713 goto errout; 1714 1715 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) { 1716 cmn_err(CE_WARN, "xdf@%s: failed to alloc event channel", 1717 ddi_get_name_addr(dip)); 1718 goto errout; 1719 } 1720 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1721 #ifdef XPV_HVM_DRIVER 1722 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1723 #else /* !XPV_HVM_DRIVER */ 1724 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1725 DDI_SUCCESS) { 1726 cmn_err(CE_WARN, "xdf_start_connect: xdf@%s: " 1727 "failed to add intr handler", ddi_get_name_addr(dip)); 1728 goto errout1; 1729 } 1730 #endif /* !XPV_HVM_DRIVER */ 1731 1732 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1733 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1734 DDI_SUCCESS) { 1735 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1736 ddi_get_name_addr(dip)); 1737 goto errout2; 1738 } 1739 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1740 1741 /* 1742 * Write into xenstore the info needed by backend 1743 */ 1744 if ((xsnode = xvdi_get_xsname(dip)) == NULL) { 1745 cmn_err(CE_WARN, "xdf@%s: " 1746 "failed to get xenstore node path", 1747 ddi_get_name_addr(dip)); 1748 goto fail_trans; 1749 } 1750 trans_retry: 1751 if (xenbus_transaction_start(&xbt)) { 1752 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1753 ddi_get_name_addr(dip)); 1754 xvdi_fatal_error(dip, EIO, "transaction start"); 1755 goto fail_trans; 1756 } 1757 1758 if (rv = xenbus_printf(xbt, xsnode, "ring-ref", "%u", gref)) { 1759 cmn_err(CE_WARN, "xdf@%s: failed to write ring-ref", 1760 ddi_get_name_addr(dip)); 1761 xvdi_fatal_error(dip, rv, "writing ring-ref"); 1762 goto abort_trans; 1763 } 1764 1765 if (rv = xenbus_printf(xbt, xsnode, "event-channel", "%u", 1766 vdp->xdf_evtchn)) { 1767 cmn_err(CE_WARN, "xdf@%s: failed to write event-channel", 1768 ddi_get_name_addr(dip)); 1769 xvdi_fatal_error(dip, rv, "writing event-channel"); 1770 goto abort_trans; 1771 } 1772 1773 /* 1774 * "protocol" is written by the domain builder in the case of PV 1775 * domains. However, it is not written for HVM domains, so let's 1776 * write it here. 1777 */ 1778 if (rv = xenbus_printf(xbt, xsnode, "protocol", "%s", 1779 XEN_IO_PROTO_ABI_NATIVE)) { 1780 cmn_err(CE_WARN, "xdf@%s: failed to write protocol", 1781 ddi_get_name_addr(dip)); 1782 xvdi_fatal_error(dip, rv, "writing protocol"); 1783 goto abort_trans; 1784 } 1785 1786 if ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0) { 1787 cmn_err(CE_WARN, "xdf@%s: " 1788 "failed to switch state to XenbusStateInitialised", 1789 ddi_get_name_addr(dip)); 1790 xvdi_fatal_error(dip, rv, "writing state"); 1791 goto abort_trans; 1792 } 1793 1794 /* kick-off connect process */ 1795 if (rv = xenbus_transaction_end(xbt, 0)) { 1796 if (rv == EAGAIN) 1797 goto trans_retry; 1798 cmn_err(CE_WARN, "xdf@%s: failed to end transaction", 1799 ddi_get_name_addr(dip)); 1800 xvdi_fatal_error(dip, rv, "completing transaction"); 1801 goto fail_trans; 1802 } 1803 1804 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1805 mutex_enter(&vdp->xdf_dev_lk); 1806 vdp->xdf_status = XD_INIT; 1807 mutex_exit(&vdp->xdf_dev_lk); 1808 1809 return (DDI_SUCCESS); 1810 1811 abort_trans: 1812 (void) xenbus_transaction_end(xbt, 1); 1813 fail_trans: 1814 xvdi_free_ring(vdp->xdf_xb_ring); 1815 errout2: 1816 #ifdef XPV_HVM_DRIVER 1817 ec_unbind_evtchn(vdp->xdf_evtchn); 1818 #else /* !XPV_HVM_DRIVER */ 1819 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1820 #endif /* !XPV_HVM_DRIVER */ 1821 errout1: 1822 xvdi_free_evtchn(dip); 1823 errout: 1824 cmn_err(CE_WARN, "xdf@%s: fail to kick-off connecting", 1825 ddi_get_name_addr(dip)); 1826 return (DDI_FAILURE); 1827 } 1828 1829 /* 1830 * Kick-off disconnect process 1831 * Status won't be changed 1832 */ 1833 static int 1834 xdf_start_disconnect(xdf_t *vdp) 1835 { 1836 if (xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed) > 0) { 1837 cmn_err(CE_WARN, "xdf@%s: fail to kick-off disconnecting", 1838 ddi_get_name_addr(vdp->xdf_dip)); 1839 return (DDI_FAILURE); 1840 } 1841 1842 return (DDI_SUCCESS); 1843 } 1844 1845 int 1846 xdf_get_flush_block(xdf_t *vdp) 1847 { 1848 /* 1849 * Get a DEV_BSIZE aligned bufer 1850 */ 1851 vdp->xdf_flush_mem = kmem_alloc(DEV_BSIZE * 2, KM_SLEEP); 1852 vdp->xdf_cache_flush_block = 1853 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), DEV_BSIZE); 1854 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1855 xdf_flush_block, DEV_BSIZE, NULL) != 0) 1856 return (DDI_FAILURE); 1857 return (DDI_SUCCESS); 1858 } 1859 1860 /* 1861 * Finish other initialization after we've connected to backend 1862 * Status should be XD_INIT before calling this routine 1863 * On success, status should be changed to XD_READY 1864 * On error, status should stay XD_INIT 1865 */ 1866 static int 1867 xdf_post_connect(xdf_t *vdp) 1868 { 1869 int rv; 1870 uint_t len; 1871 char *type; 1872 char *barrier; 1873 dev_info_t *devi = vdp->xdf_dip; 1874 1875 /* 1876 * Determine if feature barrier is supported by backend 1877 */ 1878 if (xenbus_read(XBT_NULL, xvdi_get_oename(devi), 1879 "feature-barrier", (void **)&barrier, &len) == 0) { 1880 vdp->xdf_feature_barrier = 1; 1881 kmem_free(barrier, len); 1882 } else { 1883 cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier", 1884 ddi_get_name_addr(vdp->xdf_dip)); 1885 vdp->xdf_feature_barrier = 0; 1886 } 1887 1888 /* probe backend */ 1889 if (rv = xenbus_gather(XBT_NULL, xvdi_get_oename(devi), 1890 "sectors", "%"SCNu64, &vdp->xdf_xdev_nblocks, 1891 "info", "%u", &vdp->xdf_xdev_info, NULL)) { 1892 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1893 "cannot read backend info", ddi_get_name_addr(devi)); 1894 xvdi_fatal_error(devi, rv, "reading backend info"); 1895 return (DDI_FAILURE); 1896 } 1897 1898 /* 1899 * Make sure that the device we're connecting isn't smaller than 1900 * the old connected device. 1901 */ 1902 if (vdp->xdf_xdev_nblocks < vdp->xdf_pgeom.g_capacity) { 1903 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1904 "backend disk device shrank", ddi_get_name_addr(devi)); 1905 /* XXX: call xvdi_fatal_error() here? */ 1906 xvdi_fatal_error(devi, rv, "reading backend info"); 1907 return (DDI_FAILURE); 1908 } 1909 1910 /* 1911 * Only update the physical geometry to reflect the new device 1912 * size if this is the first time we're connecting to the backend 1913 * device. Once we assign a physical geometry to a device it stays 1914 * fixed until: 1915 * - we get detach and re-attached (at which point we 1916 * automatically assign a new physical geometry). 1917 * - someone calls TG_SETPHYGEOM to explicity set the 1918 * physical geometry. 1919 */ 1920 if (vdp->xdf_pgeom.g_capacity == 0) 1921 xdf_synthetic_pgeom(devi, &vdp->xdf_pgeom); 1922 1923 /* fix disk type */ 1924 if (xenbus_read(XBT_NULL, xvdi_get_xsname(devi), "device-type", 1925 (void **)&type, &len) != 0) { 1926 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1927 "cannot read device-type", ddi_get_name_addr(devi)); 1928 xvdi_fatal_error(devi, rv, "reading device-type"); 1929 return (DDI_FAILURE); 1930 } 1931 if (strcmp(type, "cdrom") == 0) 1932 vdp->xdf_xdev_info |= VDISK_CDROM; 1933 kmem_free(type, len); 1934 1935 /* 1936 * We've created all the minor nodes via cmlb_attach() using default 1937 * value in xdf_attach() to make it possible to block in xdf_open(), 1938 * in case there's anyone (say, booting thread) ever trying to open 1939 * it before connected to backend. We will refresh all those minor 1940 * nodes w/ latest info we've got now when we are almost connected. 1941 * 1942 * Don't do this when xdf is already opened by someone (could happen 1943 * during resume), for that cmlb_attach() will invalid the label info 1944 * and confuse those who has already opened the node, which is bad. 1945 */ 1946 if (!xdf_isopen(vdp, -1) && (XD_IS_CD(vdp) || XD_IS_RM(vdp))) { 1947 /* re-init cmlb w/ latest info we got from backend */ 1948 if (cmlb_attach(devi, &xdf_lb_ops, 1949 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 1950 XD_IS_RM(vdp), 1, 1951 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD, 1952 #if defined(XPV_HVM_DRIVER) 1953 CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT | 1954 CMLB_INTERNAL_MINOR_NODES, 1955 #else /* !XPV_HVM_DRIVER */ 1956 CMLB_FAKE_LABEL_ONE_PARTITION, 1957 #endif /* !XPV_HVM_DRIVER */ 1958 vdp->xdf_vd_lbl, NULL) != 0) { 1959 cmn_err(CE_WARN, "xdf@%s: cmlb attach failed", 1960 ddi_get_name_addr(devi)); 1961 return (DDI_FAILURE); 1962 } 1963 } 1964 1965 /* mark vbd is ready for I/O */ 1966 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1967 mutex_enter(&vdp->xdf_dev_lk); 1968 vdp->xdf_status = XD_READY; 1969 mutex_exit(&vdp->xdf_dev_lk); 1970 /* 1971 * If backend has feature-barrier, see if it supports disk 1972 * cache flush op. 1973 */ 1974 vdp->xdf_flush_supported = 0; 1975 if (vdp->xdf_feature_barrier) { 1976 /* 1977 * Pretend we already know flush is supported so probe 1978 * will attempt the correct op. 1979 */ 1980 vdp->xdf_flush_supported = 1; 1981 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1982 vdp->xdf_flush_supported = 1; 1983 } else { 1984 vdp->xdf_flush_supported = 0; 1985 /* 1986 * If the other end does not support the cache flush op 1987 * then we must use a barrier-write to force disk 1988 * cache flushing. Barrier writes require that a data 1989 * block actually be written. 1990 * Cache a block to barrier-write when we are 1991 * asked to perform a flush. 1992 * XXX - would it be better to just copy 1 block 1993 * (512 bytes) from whatever write we did last 1994 * and rewrite that block? 1995 */ 1996 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) 1997 return (DDI_FAILURE); 1998 } 1999 } 2000 2001 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", ddi_get_name_addr(devi), 2002 (uint64_t)vdp->xdf_xdev_nblocks); 2003 2004 return (DDI_SUCCESS); 2005 } 2006 2007 /* 2008 * Finish other uninitialization after we've disconnected from backend 2009 * when status is XD_CLOSING or XD_INIT. After returns, status is XD_CLOSED 2010 */ 2011 static void 2012 xdf_post_disconnect(xdf_t *vdp) 2013 { 2014 #ifdef XPV_HVM_DRIVER 2015 ec_unbind_evtchn(vdp->xdf_evtchn); 2016 #else /* !XPV_HVM_DRIVER */ 2017 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 2018 #endif /* !XPV_HVM_DRIVER */ 2019 xvdi_free_evtchn(vdp->xdf_dip); 2020 xvdi_free_ring(vdp->xdf_xb_ring); 2021 vdp->xdf_xb_ring = NULL; 2022 vdp->xdf_xb_ring_hdl = NULL; 2023 vdp->xdf_peer = (domid_t)-1; 2024 2025 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 2026 mutex_enter(&vdp->xdf_dev_lk); 2027 vdp->xdf_status = XD_CLOSED; 2028 mutex_exit(&vdp->xdf_dev_lk); 2029 } 2030 2031 /*ARGSUSED*/ 2032 static void 2033 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 2034 { 2035 XenbusState new_state = *(XenbusState *)impl_data; 2036 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2037 boolean_t unexpect_die = B_FALSE; 2038 int status; 2039 2040 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 2041 ddi_get_name_addr(dip), new_state)); 2042 2043 mutex_enter(&vdp->xdf_cb_lk); 2044 2045 if (xdf_check_state_transition(vdp, new_state) == DDI_FAILURE) { 2046 mutex_exit(&vdp->xdf_cb_lk); 2047 return; 2048 } 2049 2050 switch (new_state) { 2051 case XenbusStateInitialising: 2052 ASSERT(vdp->xdf_status == XD_CLOSED); 2053 /* 2054 * backend recovered from a previous failure, 2055 * kick-off connect process again 2056 */ 2057 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 2058 cmn_err(CE_WARN, "xdf@%s:" 2059 " failed to start reconnecting to backend", 2060 ddi_get_name_addr(dip)); 2061 } 2062 break; 2063 case XenbusStateConnected: 2064 ASSERT(vdp->xdf_status == XD_INIT); 2065 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 2066 /* finish final init after connect */ 2067 if (xdf_post_connect(vdp) != DDI_SUCCESS) 2068 (void) xdf_start_disconnect(vdp); 2069 break; 2070 case XenbusStateClosing: 2071 if (vdp->xdf_status == XD_READY) { 2072 mutex_enter(&vdp->xdf_dev_lk); 2073 if (xdf_isopen(vdp, -1)) { 2074 cmn_err(CE_NOTE, "xdf@%s: hot-unplug failed, " 2075 "still in use", ddi_get_name_addr(dip)); 2076 mutex_exit(&vdp->xdf_dev_lk); 2077 break; 2078 } else { 2079 vdp->xdf_status = XD_CLOSING; 2080 } 2081 mutex_exit(&vdp->xdf_dev_lk); 2082 } 2083 (void) xdf_start_disconnect(vdp); 2084 break; 2085 case XenbusStateClosed: 2086 /* first check if BE closed unexpectedly */ 2087 mutex_enter(&vdp->xdf_dev_lk); 2088 if (xdf_isopen(vdp, -1)) { 2089 unexpect_die = B_TRUE; 2090 unexpectedie(vdp); 2091 cmn_err(CE_WARN, "xdf@%s: backend closed, " 2092 "reconnecting...", ddi_get_name_addr(dip)); 2093 } 2094 mutex_exit(&vdp->xdf_dev_lk); 2095 2096 if (vdp->xdf_status == XD_READY) { 2097 mutex_enter(&vdp->xdf_dev_lk); 2098 vdp->xdf_status = XD_CLOSING; 2099 mutex_exit(&vdp->xdf_dev_lk); 2100 2101 #ifdef DOMU_BACKEND 2102 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 2103 #endif 2104 2105 xdf_post_disconnect(vdp); 2106 (void) xvdi_switch_state(dip, XBT_NULL, 2107 XenbusStateClosed); 2108 } else if ((vdp->xdf_status == XD_INIT) || 2109 (vdp->xdf_status == XD_CLOSING)) { 2110 xdf_post_disconnect(vdp); 2111 } else { 2112 mutex_enter(&vdp->xdf_dev_lk); 2113 vdp->xdf_status = XD_CLOSED; 2114 mutex_exit(&vdp->xdf_dev_lk); 2115 } 2116 } 2117 2118 /* notify anybody waiting for oe state change */ 2119 mutex_enter(&vdp->xdf_dev_lk); 2120 cv_broadcast(&vdp->xdf_dev_cv); 2121 mutex_exit(&vdp->xdf_dev_lk); 2122 2123 status = vdp->xdf_status; 2124 mutex_exit(&vdp->xdf_cb_lk); 2125 2126 if (status == XD_READY) { 2127 xdf_iostart(vdp); 2128 } else if ((status == XD_CLOSED) && !unexpect_die) { 2129 /* interface is closed successfully, remove all minor nodes */ 2130 if (vdp->xdf_vd_lbl != NULL) { 2131 cmlb_detach(vdp->xdf_vd_lbl, NULL); 2132 cmlb_free_handle(&vdp->xdf_vd_lbl); 2133 vdp->xdf_vd_lbl = NULL; 2134 } 2135 } 2136 } 2137 2138 /* check if partition is open, -1 - check all partitions on the disk */ 2139 static boolean_t 2140 xdf_isopen(xdf_t *vdp, int partition) 2141 { 2142 int i; 2143 ulong_t parbit; 2144 boolean_t rval = B_FALSE; 2145 2146 ASSERT((partition == -1) || 2147 ((partition >= 0) || (partition < XDF_PEXT))); 2148 2149 if (partition == -1) 2150 parbit = (ulong_t)-1; 2151 else 2152 parbit = 1 << partition; 2153 2154 for (i = 0; i < OTYPCNT; i++) { 2155 if (vdp->xdf_vd_open[i] & parbit) 2156 rval = B_TRUE; 2157 } 2158 2159 return (rval); 2160 } 2161 2162 /* 2163 * Xdf_check_state_transition will check the XenbusState change to see 2164 * if the change is a valid transition or not. 2165 * The new state is written by backend domain, or by running xenstore-write 2166 * to change it manually in dom0 2167 */ 2168 static int 2169 xdf_check_state_transition(xdf_t *vdp, XenbusState oestate) 2170 { 2171 int status; 2172 int stcheck; 2173 #define STOK 0 /* need further process */ 2174 #define STNOP 1 /* no action need taking */ 2175 #define STBUG 2 /* unexpected state change, could be a bug */ 2176 2177 status = vdp->xdf_status; 2178 stcheck = STOK; 2179 2180 switch (status) { 2181 case XD_UNKNOWN: 2182 if ((oestate == XenbusStateUnknown) || 2183 (oestate == XenbusStateConnected)) 2184 stcheck = STBUG; 2185 else if ((oestate == XenbusStateInitialising) || 2186 (oestate == XenbusStateInitWait) || 2187 (oestate == XenbusStateInitialised)) 2188 stcheck = STNOP; 2189 break; 2190 case XD_INIT: 2191 if (oestate == XenbusStateUnknown) 2192 stcheck = STBUG; 2193 else if ((oestate == XenbusStateInitialising) || 2194 (oestate == XenbusStateInitWait) || 2195 (oestate == XenbusStateInitialised)) 2196 stcheck = STNOP; 2197 break; 2198 case XD_READY: 2199 if ((oestate == XenbusStateUnknown) || 2200 (oestate == XenbusStateInitialising) || 2201 (oestate == XenbusStateInitWait) || 2202 (oestate == XenbusStateInitialised)) 2203 stcheck = STBUG; 2204 else if (oestate == XenbusStateConnected) 2205 stcheck = STNOP; 2206 break; 2207 case XD_CLOSING: 2208 if ((oestate == XenbusStateUnknown) || 2209 (oestate == XenbusStateInitialising) || 2210 (oestate == XenbusStateInitWait) || 2211 (oestate == XenbusStateInitialised) || 2212 (oestate == XenbusStateConnected)) 2213 stcheck = STBUG; 2214 else if (oestate == XenbusStateClosing) 2215 stcheck = STNOP; 2216 break; 2217 case XD_CLOSED: 2218 if ((oestate == XenbusStateUnknown) || 2219 (oestate == XenbusStateConnected)) 2220 stcheck = STBUG; 2221 else if ((oestate == XenbusStateInitWait) || 2222 (oestate == XenbusStateInitialised) || 2223 (oestate == XenbusStateClosing) || 2224 (oestate == XenbusStateClosed)) 2225 stcheck = STNOP; 2226 break; 2227 case XD_SUSPEND: 2228 default: 2229 stcheck = STBUG; 2230 } 2231 2232 if (stcheck == STOK) 2233 return (DDI_SUCCESS); 2234 2235 if (stcheck == STBUG) 2236 cmn_err(CE_NOTE, "xdf@%s: unexpected otherend " 2237 "state change to %d!, when status is %d", 2238 ddi_get_name_addr(vdp->xdf_dip), oestate, status); 2239 2240 return (DDI_FAILURE); 2241 } 2242 2243 static int 2244 xdf_connect(xdf_t *vdp, boolean_t wait) 2245 { 2246 ASSERT(mutex_owned(&vdp->xdf_dev_lk)); 2247 while (vdp->xdf_status != XD_READY) { 2248 if (!wait || (vdp->xdf_status > XD_READY)) 2249 break; 2250 2251 if (cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk) == 0) 2252 break; 2253 } 2254 2255 return (vdp->xdf_status); 2256 } 2257 2258 /* 2259 * callback func when DMA/GTE resources is available 2260 * 2261 * Note: we only register one callback function to grant table subsystem 2262 * since we only have one 'struct gnttab_free_callback' in xdf_t. 2263 */ 2264 static int 2265 xdf_dmacallback(caddr_t arg) 2266 { 2267 xdf_t *vdp = (xdf_t *)arg; 2268 ASSERT(vdp != NULL); 2269 2270 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 2271 ddi_get_name_addr(vdp->xdf_dip))); 2272 2273 ddi_trigger_softintr(vdp->xdf_softintr_id); 2274 return (DDI_DMA_CALLBACK_DONE); 2275 } 2276 2277 static uint_t 2278 xdf_iorestart(caddr_t arg) 2279 { 2280 xdf_t *vdp = (xdf_t *)arg; 2281 2282 ASSERT(vdp != NULL); 2283 2284 mutex_enter(&vdp->xdf_dev_lk); 2285 ASSERT(ISDMACBON(vdp)); 2286 SETDMACBOFF(vdp); 2287 mutex_exit(&vdp->xdf_dev_lk); 2288 2289 xdf_iostart(vdp); 2290 2291 return (DDI_INTR_CLAIMED); 2292 } 2293 2294 static void 2295 xdf_timeout_handler(void *arg) 2296 { 2297 xdf_t *vdp = arg; 2298 2299 mutex_enter(&vdp->xdf_dev_lk); 2300 vdp->xdf_timeout_id = 0; 2301 mutex_exit(&vdp->xdf_dev_lk); 2302 2303 /* new timeout thread could be re-scheduled */ 2304 xdf_iostart(vdp); 2305 } 2306 2307 /* 2308 * Alloc a vreq for this bp 2309 * bp->av_back contains the pointer to the vreq upon return 2310 */ 2311 static v_req_t * 2312 vreq_get(xdf_t *vdp, buf_t *bp) 2313 { 2314 v_req_t *vreq = NULL; 2315 2316 ASSERT(BP2VREQ(bp) == NULL); 2317 2318 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 2319 if (vreq == NULL) { 2320 if (vdp->xdf_timeout_id == 0) 2321 /* restart I/O after one second */ 2322 vdp->xdf_timeout_id = 2323 timeout(xdf_timeout_handler, vdp, hz); 2324 return (NULL); 2325 } 2326 bzero(vreq, sizeof (v_req_t)); 2327 2328 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 2329 bp->av_back = (buf_t *)vreq; 2330 vreq->v_buf = bp; 2331 vreq->v_status = VREQ_INIT; 2332 /* init of other fields in vreq is up to the caller */ 2333 2334 return (vreq); 2335 } 2336 2337 static void 2338 vreq_free(xdf_t *vdp, v_req_t *vreq) 2339 { 2340 buf_t *bp = vreq->v_buf; 2341 2342 list_remove(&vdp->xdf_vreq_act, (void *)vreq); 2343 2344 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 2345 goto done; 2346 2347 switch (vreq->v_status) { 2348 case VREQ_DMAWIN_DONE: 2349 case VREQ_GS_ALLOCED: 2350 case VREQ_DMABUF_BOUND: 2351 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 2352 /*FALLTHRU*/ 2353 case VREQ_DMAMEM_ALLOCED: 2354 if (!ALIGNED_XFER(bp)) { 2355 ASSERT(vreq->v_abuf != NULL); 2356 if (!IS_ERROR(bp) && IS_READ(bp)) 2357 bcopy(vreq->v_abuf, bp->b_un.b_addr, 2358 bp->b_bcount); 2359 ddi_dma_mem_free(&vreq->v_align); 2360 } 2361 /*FALLTHRU*/ 2362 case VREQ_MEMDMAHDL_ALLOCED: 2363 if (!ALIGNED_XFER(bp)) 2364 ddi_dma_free_handle(&vreq->v_memdmahdl); 2365 /*FALLTHRU*/ 2366 case VREQ_DMAHDL_ALLOCED: 2367 ddi_dma_free_handle(&vreq->v_dmahdl); 2368 break; 2369 default: 2370 break; 2371 } 2372 done: 2373 vreq->v_buf->av_back = NULL; 2374 kmem_cache_free(xdf_vreq_cache, vreq); 2375 } 2376 2377 /* 2378 * Initalize the DMA and grant table resources for the buf 2379 */ 2380 static int 2381 vreq_setup(xdf_t *vdp, v_req_t *vreq) 2382 { 2383 int rc; 2384 ddi_dma_attr_t dmaattr; 2385 uint_t ndcs, ndws; 2386 ddi_dma_handle_t dh; 2387 ddi_dma_handle_t mdh; 2388 ddi_dma_cookie_t dc; 2389 ddi_acc_handle_t abh; 2390 caddr_t aba; 2391 ge_slot_t *gs; 2392 size_t bufsz; 2393 off_t off; 2394 size_t sz; 2395 buf_t *bp = vreq->v_buf; 2396 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 2397 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 2398 2399 switch (vreq->v_status) { 2400 case VREQ_INIT: 2401 if (IS_FLUSH_DISKCACHE(bp)) { 2402 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2403 DPRINTF(DMA_DBG, ( 2404 "xdf@%s: get ge_slotfailed\n", 2405 ddi_get_name_addr(vdp->xdf_dip))); 2406 return (DDI_FAILURE); 2407 } 2408 vreq->v_blkno = 0; 2409 vreq->v_nslots = 1; 2410 vreq->v_gs = gs; 2411 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 2412 vreq->v_status = VREQ_GS_ALLOCED; 2413 gs->vreq = vreq; 2414 return (DDI_SUCCESS); 2415 } 2416 2417 if (IS_WRITE_BARRIER(vdp, bp)) 2418 vreq->v_flush_diskcache = WRITE_BARRIER; 2419 vreq->v_blkno = bp->b_blkno + 2420 (diskaddr_t)(uintptr_t)bp->b_private; 2421 bp->b_private = NULL; 2422 /* See if we wrote new data to our flush block */ 2423 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 2424 check_fbwrite(vdp, bp, vreq->v_blkno); 2425 vreq->v_status = VREQ_INIT_DONE; 2426 /*FALLTHRU*/ 2427 2428 case VREQ_INIT_DONE: 2429 /* 2430 * alloc DMA handle 2431 */ 2432 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 2433 xdf_dmacallback, (caddr_t)vdp, &dh); 2434 if (rc != DDI_SUCCESS) { 2435 SETDMACBON(vdp); 2436 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 2437 ddi_get_name_addr(vdp->xdf_dip))); 2438 return (DDI_FAILURE); 2439 } 2440 2441 vreq->v_dmahdl = dh; 2442 vreq->v_status = VREQ_DMAHDL_ALLOCED; 2443 /*FALLTHRU*/ 2444 2445 case VREQ_DMAHDL_ALLOCED: 2446 /* 2447 * alloc dma handle for 512-byte aligned buf 2448 */ 2449 if (!ALIGNED_XFER(bp)) { 2450 /* 2451 * XXPV: we need to temporarily enlarge the seg 2452 * boundary and s/g length to work round CR6381968 2453 */ 2454 dmaattr = xb_dma_attr; 2455 dmaattr.dma_attr_seg = (uint64_t)-1; 2456 dmaattr.dma_attr_sgllen = INT_MAX; 2457 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 2458 xdf_dmacallback, (caddr_t)vdp, &mdh); 2459 if (rc != DDI_SUCCESS) { 2460 SETDMACBON(vdp); 2461 DPRINTF(DMA_DBG, ("xdf@%s: unaligned buf DMA" 2462 "handle alloc failed\n", 2463 ddi_get_name_addr(vdp->xdf_dip))); 2464 return (DDI_FAILURE); 2465 } 2466 vreq->v_memdmahdl = mdh; 2467 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 2468 } 2469 /*FALLTHRU*/ 2470 2471 case VREQ_MEMDMAHDL_ALLOCED: 2472 /* 2473 * alloc 512-byte aligned buf 2474 */ 2475 if (!ALIGNED_XFER(bp)) { 2476 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 2477 bp_mapin(bp); 2478 2479 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 2480 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 2481 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 2482 &aba, &bufsz, &abh); 2483 if (rc != DDI_SUCCESS) { 2484 SETDMACBON(vdp); 2485 DPRINTF(DMA_DBG, ( 2486 "xdf@%s: DMA mem allocation failed\n", 2487 ddi_get_name_addr(vdp->xdf_dip))); 2488 return (DDI_FAILURE); 2489 } 2490 2491 vreq->v_abuf = aba; 2492 vreq->v_align = abh; 2493 vreq->v_status = VREQ_DMAMEM_ALLOCED; 2494 2495 ASSERT(bufsz >= bp->b_bcount); 2496 if (!IS_READ(bp)) 2497 bcopy(bp->b_un.b_addr, vreq->v_abuf, 2498 bp->b_bcount); 2499 } 2500 /*FALLTHRU*/ 2501 2502 case VREQ_DMAMEM_ALLOCED: 2503 /* 2504 * dma bind 2505 */ 2506 if (ALIGNED_XFER(bp)) { 2507 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 2508 dma_flags, xdf_dmacallback, (caddr_t)vdp, 2509 &dc, &ndcs); 2510 } else { 2511 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 2512 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 2513 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 2514 } 2515 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 2516 /* get num of dma windows */ 2517 if (rc == DDI_DMA_PARTIAL_MAP) { 2518 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 2519 ASSERT(rc == DDI_SUCCESS); 2520 } else { 2521 ndws = 1; 2522 } 2523 } else { 2524 SETDMACBON(vdp); 2525 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 2526 ddi_get_name_addr(vdp->xdf_dip))); 2527 return (DDI_FAILURE); 2528 } 2529 2530 vreq->v_dmac = dc; 2531 vreq->v_dmaw = 0; 2532 vreq->v_ndmacs = ndcs; 2533 vreq->v_ndmaws = ndws; 2534 vreq->v_nslots = ndws; 2535 vreq->v_status = VREQ_DMABUF_BOUND; 2536 /*FALLTHRU*/ 2537 2538 case VREQ_DMABUF_BOUND: 2539 /* 2540 * get ge_slot, callback is set upon failure from gs_get(), 2541 * if not set previously 2542 */ 2543 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2544 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2545 ddi_get_name_addr(vdp->xdf_dip))); 2546 return (DDI_FAILURE); 2547 } 2548 2549 vreq->v_gs = gs; 2550 gs->vreq = vreq; 2551 vreq->v_status = VREQ_GS_ALLOCED; 2552 break; 2553 2554 case VREQ_GS_ALLOCED: 2555 /* nothing need to be done */ 2556 break; 2557 2558 case VREQ_DMAWIN_DONE: 2559 /* 2560 * move to the next dma window 2561 */ 2562 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 2563 2564 /* get a ge_slot for this DMA window */ 2565 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2566 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2567 ddi_get_name_addr(vdp->xdf_dip))); 2568 return (DDI_FAILURE); 2569 } 2570 2571 vreq->v_gs = gs; 2572 gs->vreq = vreq; 2573 vreq->v_dmaw++; 2574 rc = ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 2575 &vreq->v_dmac, &vreq->v_ndmacs); 2576 ASSERT(rc == DDI_SUCCESS); 2577 vreq->v_status = VREQ_GS_ALLOCED; 2578 break; 2579 2580 default: 2581 return (DDI_FAILURE); 2582 } 2583 2584 return (DDI_SUCCESS); 2585 } 2586 2587 static ge_slot_t * 2588 gs_get(xdf_t *vdp, int isread) 2589 { 2590 grant_ref_t gh; 2591 ge_slot_t *gs; 2592 2593 /* try to alloc GTEs needed in this slot, first */ 2594 if (gnttab_alloc_grant_references( 2595 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 2596 if (vdp->xdf_gnt_callback.next == NULL) { 2597 SETDMACBON(vdp); 2598 gnttab_request_free_callback( 2599 &vdp->xdf_gnt_callback, 2600 (void (*)(void *))xdf_dmacallback, 2601 (void *)vdp, 2602 BLKIF_MAX_SEGMENTS_PER_REQUEST); 2603 } 2604 return (NULL); 2605 } 2606 2607 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 2608 if (gs == NULL) { 2609 gnttab_free_grant_references(gh); 2610 if (vdp->xdf_timeout_id == 0) 2611 /* restart I/O after one second */ 2612 vdp->xdf_timeout_id = 2613 timeout(xdf_timeout_handler, vdp, hz); 2614 return (NULL); 2615 } 2616 2617 /* init gs_slot */ 2618 list_insert_head(&vdp->xdf_gs_act, (void *)gs); 2619 gs->oeid = vdp->xdf_peer; 2620 gs->isread = isread; 2621 gs->ghead = gh; 2622 gs->ngrefs = 0; 2623 2624 return (gs); 2625 } 2626 2627 static void 2628 gs_free(xdf_t *vdp, ge_slot_t *gs) 2629 { 2630 int i; 2631 grant_ref_t *gp = gs->ge; 2632 int ngrefs = gs->ngrefs; 2633 boolean_t isread = gs->isread; 2634 2635 list_remove(&vdp->xdf_gs_act, (void *)gs); 2636 2637 /* release all grant table entry resources used in this slot */ 2638 for (i = 0; i < ngrefs; i++, gp++) 2639 gnttab_end_foreign_access(*gp, !isread, 0); 2640 gnttab_free_grant_references(gs->ghead); 2641 2642 kmem_cache_free(xdf_gs_cache, (void *)gs); 2643 } 2644 2645 static grant_ref_t 2646 gs_grant(ge_slot_t *gs, mfn_t mfn) 2647 { 2648 grant_ref_t gr = gnttab_claim_grant_reference(&gs->ghead); 2649 2650 ASSERT(gr != -1); 2651 ASSERT(gs->ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 2652 gs->ge[gs->ngrefs++] = gr; 2653 gnttab_grant_foreign_access_ref(gr, gs->oeid, mfn, !gs->isread); 2654 2655 return (gr); 2656 } 2657 2658 static void 2659 unexpectedie(xdf_t *vdp) 2660 { 2661 /* clean up I/Os in ring that have responses */ 2662 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 2663 mutex_exit(&vdp->xdf_dev_lk); 2664 (void) xdf_intr((caddr_t)vdp); 2665 mutex_enter(&vdp->xdf_dev_lk); 2666 } 2667 2668 /* free up all grant table entries */ 2669 while (!list_is_empty(&vdp->xdf_gs_act)) 2670 gs_free(vdp, list_head(&vdp->xdf_gs_act)); 2671 2672 /* 2673 * move bp back to active list orderly 2674 * vreq_busy is updated in vreq_free() 2675 */ 2676 while (!list_is_empty(&vdp->xdf_vreq_act)) { 2677 v_req_t *vreq = list_head(&vdp->xdf_vreq_act); 2678 buf_t *bp = vreq->v_buf; 2679 2680 bp->av_back = NULL; 2681 bp->b_resid = bp->b_bcount; 2682 if (vdp->xdf_f_act == NULL) { 2683 vdp->xdf_f_act = vdp->xdf_l_act = bp; 2684 } else { 2685 /* move to the head of list */ 2686 bp->av_forw = vdp->xdf_f_act; 2687 vdp->xdf_f_act = bp; 2688 } 2689 if (vdp->xdf_xdev_iostat != NULL) 2690 kstat_runq_back_to_waitq( 2691 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 2692 vreq_free(vdp, vreq); 2693 } 2694 } 2695 2696 static void 2697 xdfmin(struct buf *bp) 2698 { 2699 if (bp->b_bcount > xdf_maxphys) 2700 bp->b_bcount = xdf_maxphys; 2701 } 2702 2703 void 2704 xdf_kstat_delete(dev_info_t *dip) 2705 { 2706 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2707 kstat_t *kstat; 2708 2709 /* 2710 * The locking order here is xdf_iostat_lk and then xdf_dev_lk. 2711 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer 2712 * and the contents of the our kstat. xdf_iostat_lk is used 2713 * to protect the allocation and freeing of the actual kstat. 2714 * xdf_dev_lk can't be used for this purpose because kstat 2715 * readers use it to access the contents of the kstat and 2716 * hence it can't be held when calling kstat_delete(). 2717 */ 2718 mutex_enter(&vdp->xdf_iostat_lk); 2719 mutex_enter(&vdp->xdf_dev_lk); 2720 2721 if (vdp->xdf_xdev_iostat == NULL) { 2722 mutex_exit(&vdp->xdf_dev_lk); 2723 mutex_exit(&vdp->xdf_iostat_lk); 2724 return; 2725 } 2726 2727 kstat = vdp->xdf_xdev_iostat; 2728 vdp->xdf_xdev_iostat = NULL; 2729 mutex_exit(&vdp->xdf_dev_lk); 2730 2731 kstat_delete(kstat); 2732 mutex_exit(&vdp->xdf_iostat_lk); 2733 } 2734 2735 int 2736 xdf_kstat_create(dev_info_t *dip, char *ks_module, int ks_instance) 2737 { 2738 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2739 2740 /* See comment about locking in xdf_kstat_delete(). */ 2741 mutex_enter(&vdp->xdf_iostat_lk); 2742 mutex_enter(&vdp->xdf_dev_lk); 2743 2744 if (vdp->xdf_xdev_iostat != NULL) { 2745 mutex_exit(&vdp->xdf_dev_lk); 2746 mutex_exit(&vdp->xdf_iostat_lk); 2747 return (-1); 2748 } 2749 2750 if ((vdp->xdf_xdev_iostat = kstat_create( 2751 ks_module, ks_instance, NULL, "disk", 2752 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 2753 mutex_exit(&vdp->xdf_dev_lk); 2754 mutex_exit(&vdp->xdf_iostat_lk); 2755 return (-1); 2756 } 2757 2758 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 2759 kstat_install(vdp->xdf_xdev_iostat); 2760 mutex_exit(&vdp->xdf_dev_lk); 2761 mutex_exit(&vdp->xdf_iostat_lk); 2762 2763 return (0); 2764 } 2765 2766 #if defined(XPV_HVM_DRIVER) 2767 2768 typedef struct xdf_hvm_entry { 2769 list_node_t xdf_he_list; 2770 char *xdf_he_path; 2771 dev_info_t *xdf_he_dip; 2772 } xdf_hvm_entry_t; 2773 2774 static list_t xdf_hvm_list; 2775 static kmutex_t xdf_hvm_list_lock; 2776 2777 static xdf_hvm_entry_t * 2778 i_xdf_hvm_find(char *path, dev_info_t *dip) 2779 { 2780 xdf_hvm_entry_t *i; 2781 2782 ASSERT((path != NULL) || (dip != NULL)); 2783 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); 2784 2785 i = list_head(&xdf_hvm_list); 2786 while (i != NULL) { 2787 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { 2788 i = list_next(&xdf_hvm_list, i); 2789 continue; 2790 } 2791 if ((dip != NULL) && (i->xdf_he_dip != dip)) { 2792 i = list_next(&xdf_hvm_list, i); 2793 continue; 2794 } 2795 break; 2796 } 2797 return (i); 2798 } 2799 2800 dev_info_t * 2801 xdf_hvm_hold(char *path) 2802 { 2803 xdf_hvm_entry_t *i; 2804 dev_info_t *dip; 2805 2806 mutex_enter(&xdf_hvm_list_lock); 2807 i = i_xdf_hvm_find(path, NULL); 2808 if (i == NULL) { 2809 mutex_exit(&xdf_hvm_list_lock); 2810 return (B_FALSE); 2811 } 2812 ndi_hold_devi(dip = i->xdf_he_dip); 2813 mutex_exit(&xdf_hvm_list_lock); 2814 return (dip); 2815 } 2816 2817 static void 2818 xdf_hvm_add(dev_info_t *dip) 2819 { 2820 xdf_hvm_entry_t *i; 2821 char *path; 2822 2823 /* figure out the path for the dip */ 2824 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 2825 (void) ddi_pathname(dip, path); 2826 2827 i = kmem_alloc(sizeof (*i), KM_SLEEP); 2828 i->xdf_he_dip = dip; 2829 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); 2830 2831 mutex_enter(&xdf_hvm_list_lock); 2832 ASSERT(i_xdf_hvm_find(path, NULL) == NULL); 2833 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); 2834 list_insert_head(&xdf_hvm_list, i); 2835 mutex_exit(&xdf_hvm_list_lock); 2836 2837 kmem_free(path, MAXPATHLEN); 2838 } 2839 2840 static void 2841 xdf_hvm_rm(dev_info_t *dip) 2842 { 2843 xdf_hvm_entry_t *i; 2844 2845 mutex_enter(&xdf_hvm_list_lock); 2846 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); 2847 list_remove(&xdf_hvm_list, i); 2848 mutex_exit(&xdf_hvm_list_lock); 2849 2850 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); 2851 kmem_free(i, sizeof (*i)); 2852 } 2853 2854 static void 2855 xdf_hvm_init(void) 2856 { 2857 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), 2858 offsetof(xdf_hvm_entry_t, xdf_he_list)); 2859 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); 2860 } 2861 2862 static void 2863 xdf_hvm_fini(void) 2864 { 2865 ASSERT(list_head(&xdf_hvm_list) == NULL); 2866 list_destroy(&xdf_hvm_list); 2867 mutex_destroy(&xdf_hvm_list_lock); 2868 } 2869 2870 int 2871 xdf_hvm_connect(dev_info_t *dip) 2872 { 2873 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2874 int rv; 2875 2876 /* do cv_wait until connected or failed */ 2877 mutex_enter(&vdp->xdf_dev_lk); 2878 rv = xdf_connect(vdp, B_TRUE); 2879 mutex_exit(&vdp->xdf_dev_lk); 2880 return ((rv == XD_READY) ? 0 : -1); 2881 } 2882 2883 int 2884 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2885 { 2886 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2887 2888 /* sanity check the requested physical geometry */ 2889 mutex_enter(&vdp->xdf_dev_lk); 2890 if ((geomp->g_secsize != XB_BSIZE) || 2891 (geomp->g_capacity == 0)) { 2892 mutex_exit(&vdp->xdf_dev_lk); 2893 return (EINVAL); 2894 } 2895 2896 /* 2897 * If we've already connected to the backend device then make sure 2898 * we're not defining a physical geometry larger than our backend 2899 * device. 2900 */ 2901 if ((vdp->xdf_xdev_nblocks != 0) && 2902 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { 2903 mutex_exit(&vdp->xdf_dev_lk); 2904 return (EINVAL); 2905 } 2906 2907 vdp->xdf_pgeom = *geomp; 2908 mutex_exit(&vdp->xdf_dev_lk); 2909 2910 /* force a re-validation */ 2911 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 2912 2913 return (0); 2914 } 2915 2916 #endif /* XPV_HVM_DRIVER */ 2917