1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * xdf.c - Xen Virtual Block Device Driver 29 * TODO: 30 * - support alternate block size (currently only DEV_BSIZE supported) 31 * - revalidate geometry for removable devices 32 */ 33 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/conf.h> 39 #include <sys/cmlb.h> 40 #include <sys/dkio.h> 41 #include <sys/promif.h> 42 #include <sys/sysmacros.h> 43 #include <sys/kstat.h> 44 #include <sys/mach_mmu.h> 45 #ifdef XPV_HVM_DRIVER 46 #include <sys/xpv_support.h> 47 #include <sys/sunndi.h> 48 #endif /* XPV_HVM_DRIVER */ 49 #include <public/io/xenbus.h> 50 #include <xen/sys/xenbus_impl.h> 51 #include <xen/sys/xendev.h> 52 #include <sys/gnttab.h> 53 #include <sys/scsi/generic/inquiry.h> 54 #include <xen/io/blkif_impl.h> 55 #include <io/xdf.h> 56 57 #define FLUSH_DISKCACHE 0x1 58 #define WRITE_BARRIER 0x2 59 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 60 #define USE_WRITE_BARRIER(vdp) \ 61 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 62 #define USE_FLUSH_DISKCACHE(vdp) \ 63 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 64 #define IS_WRITE_BARRIER(vdp, bp) \ 65 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 66 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 67 #define IS_FLUSH_DISKCACHE(bp) \ 68 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 69 70 static void *vbd_ss; 71 static kmem_cache_t *xdf_vreq_cache; 72 static kmem_cache_t *xdf_gs_cache; 73 static int xdf_maxphys = XB_MAXPHYS; 74 int xdfdebug = 0; 75 extern int do_polled_io; 76 diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 77 int xdf_barrier_flush_disable = 0; 78 79 /* 80 * dev_ops and cb_ops entrypoints 81 */ 82 static int xdf_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 83 static int xdf_attach(dev_info_t *, ddi_attach_cmd_t); 84 static int xdf_detach(dev_info_t *, ddi_detach_cmd_t); 85 static int xdf_reset(dev_info_t *, ddi_reset_cmd_t); 86 static int xdf_open(dev_t *, int, int, cred_t *); 87 static int xdf_close(dev_t, int, int, struct cred *); 88 static int xdf_strategy(struct buf *); 89 static int xdf_read(dev_t, struct uio *, cred_t *); 90 static int xdf_aread(dev_t, struct aio_req *, cred_t *); 91 static int xdf_write(dev_t, struct uio *, cred_t *); 92 static int xdf_awrite(dev_t, struct aio_req *, cred_t *); 93 static int xdf_dump(dev_t, caddr_t, daddr_t, int); 94 static int xdf_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 95 static uint_t xdf_intr(caddr_t); 96 static int xdf_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 97 caddr_t, int *); 98 99 /* 100 * misc private functions 101 */ 102 static int xdf_suspend(dev_info_t *); 103 static int xdf_resume(dev_info_t *); 104 static int xdf_start_connect(xdf_t *); 105 static int xdf_start_disconnect(xdf_t *); 106 static int xdf_post_connect(xdf_t *); 107 static void xdf_post_disconnect(xdf_t *); 108 static void xdf_oe_change(dev_info_t *, ddi_eventcookie_t, void *, void *); 109 static void xdf_iostart(xdf_t *); 110 static void xdf_iofini(xdf_t *, uint64_t, int); 111 static int xdf_prepare_rreq(xdf_t *, struct buf *, blkif_request_t *); 112 static int xdf_drain_io(xdf_t *); 113 static boolean_t xdf_isopen(xdf_t *, int); 114 static int xdf_check_state_transition(xdf_t *, XenbusState); 115 static int xdf_connect(xdf_t *, boolean_t); 116 static int xdf_dmacallback(caddr_t); 117 static void xdf_timeout_handler(void *); 118 static uint_t xdf_iorestart(caddr_t); 119 static v_req_t *vreq_get(xdf_t *, buf_t *); 120 static void vreq_free(xdf_t *, v_req_t *); 121 static int vreq_setup(xdf_t *, v_req_t *); 122 static ge_slot_t *gs_get(xdf_t *, int); 123 static void gs_free(xdf_t *, ge_slot_t *); 124 static grant_ref_t gs_grant(ge_slot_t *, mfn_t); 125 static void unexpectedie(xdf_t *); 126 static void xdfmin(struct buf *); 127 static void xdf_synthetic_pgeom(dev_info_t *, cmlb_geom_t *); 128 extern int xdf_kstat_create(dev_info_t *, char *, int); 129 extern void xdf_kstat_delete(dev_info_t *); 130 131 #if defined(XPV_HVM_DRIVER) 132 static void xdf_hvm_add(dev_info_t *); 133 static void xdf_hvm_rm(dev_info_t *); 134 static void xdf_hvm_init(void); 135 static void xdf_hvm_fini(void); 136 #endif /* XPV_HVM_DRIVER */ 137 138 static struct cb_ops xdf_cbops = { 139 xdf_open, 140 xdf_close, 141 xdf_strategy, 142 nodev, 143 xdf_dump, 144 xdf_read, 145 xdf_write, 146 xdf_ioctl, 147 nodev, 148 nodev, 149 nodev, 150 nochpoll, 151 xdf_prop_op, 152 NULL, 153 D_MP | D_NEW | D_64BIT, 154 CB_REV, 155 xdf_aread, 156 xdf_awrite 157 }; 158 159 struct dev_ops xdf_devops = { 160 DEVO_REV, /* devo_rev */ 161 0, /* devo_refcnt */ 162 xdf_getinfo, /* devo_getinfo */ 163 nulldev, /* devo_identify */ 164 nulldev, /* devo_probe */ 165 xdf_attach, /* devo_attach */ 166 xdf_detach, /* devo_detach */ 167 xdf_reset, /* devo_reset */ 168 &xdf_cbops, /* devo_cb_ops */ 169 (struct bus_ops *)NULL /* devo_bus_ops */ 170 }; 171 172 static struct modldrv modldrv = { 173 &mod_driverops, /* Type of module. This one is a driver */ 174 "virtual block driver %I%", /* short description */ 175 &xdf_devops /* driver specific ops */ 176 }; 177 178 static struct modlinkage xdf_modlinkage = { 179 MODREV_1, (void *)&modldrv, NULL 180 }; 181 182 /* 183 * I/O buffer DMA attributes 184 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 185 */ 186 static ddi_dma_attr_t xb_dma_attr = { 187 DMA_ATTR_V0, 188 (uint64_t)0, /* lowest address */ 189 (uint64_t)0xffffffffffffffff, /* highest usable address */ 190 (uint64_t)0xffffff, /* DMA counter limit max */ 191 (uint64_t)XB_BSIZE, /* alignment in bytes */ 192 XB_BSIZE - 1, /* bitmap of burst sizes */ 193 XB_BSIZE, /* min transfer */ 194 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 195 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 196 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 197 XB_BSIZE, /* granularity */ 198 0, /* flags (reserved) */ 199 }; 200 201 static ddi_device_acc_attr_t xc_acc_attr = { 202 DDI_DEVICE_ATTR_V0, 203 DDI_NEVERSWAP_ACC, 204 DDI_STRICTORDER_ACC 205 }; 206 207 /* callbacks from commmon label */ 208 209 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); 210 int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 211 212 static cmlb_tg_ops_t xdf_lb_ops = { 213 TG_DK_OPS_VERSION_1, 214 xdf_lb_rdwr, 215 xdf_lb_getinfo 216 }; 217 218 int 219 _init(void) 220 { 221 int rc; 222 223 if ((rc = ddi_soft_state_init(&vbd_ss, sizeof (xdf_t), 0)) != 0) 224 return (rc); 225 226 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 227 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 228 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 229 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 230 231 #if defined(XPV_HVM_DRIVER) 232 xdf_hvm_init(); 233 #endif /* XPV_HVM_DRIVER */ 234 235 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 236 #if defined(XPV_HVM_DRIVER) 237 xdf_hvm_fini(); 238 #endif /* XPV_HVM_DRIVER */ 239 kmem_cache_destroy(xdf_vreq_cache); 240 kmem_cache_destroy(xdf_gs_cache); 241 ddi_soft_state_fini(&vbd_ss); 242 return (rc); 243 } 244 245 return (rc); 246 } 247 248 int 249 _fini(void) 250 { 251 252 int err; 253 if ((err = mod_remove(&xdf_modlinkage)) != 0) 254 return (err); 255 256 #if defined(XPV_HVM_DRIVER) 257 xdf_hvm_fini(); 258 #endif /* XPV_HVM_DRIVER */ 259 260 kmem_cache_destroy(xdf_vreq_cache); 261 kmem_cache_destroy(xdf_gs_cache); 262 ddi_soft_state_fini(&vbd_ss); 263 264 return (0); 265 } 266 267 int 268 _info(struct modinfo *modinfop) 269 { 270 return (mod_info(&xdf_modlinkage, modinfop)); 271 } 272 273 /*ARGSUSED*/ 274 static int 275 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 276 { 277 int instance; 278 xdf_t *vbdp; 279 280 instance = XDF_INST(getminor((dev_t)arg)); 281 282 switch (cmd) { 283 case DDI_INFO_DEVT2DEVINFO: 284 if ((vbdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) { 285 *rp = NULL; 286 return (DDI_FAILURE); 287 } 288 *rp = vbdp->xdf_dip; 289 return (DDI_SUCCESS); 290 291 case DDI_INFO_DEVT2INSTANCE: 292 *rp = (void *)(uintptr_t)instance; 293 return (DDI_SUCCESS); 294 295 default: 296 return (DDI_FAILURE); 297 } 298 } 299 300 static int 301 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 302 char *name, caddr_t valuep, int *lengthp) 303 { 304 int instance = ddi_get_instance(dip); 305 xdf_t *vdp; 306 diskaddr_t p_blkcnt; 307 308 /* 309 * xdf dynamic properties are device specific and size oriented. 310 * Requests issued under conditions where size is valid are passed 311 * to ddi_prop_op_nblocks with the size information, otherwise the 312 * request is passed to ddi_prop_op. 313 */ 314 vdp = ddi_get_soft_state(vbd_ss, instance); 315 316 if ((dev == DDI_DEV_T_ANY) || (vdp == NULL)) 317 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 318 name, valuep, lengthp)); 319 320 /* do cv_wait until connected or failed */ 321 mutex_enter(&vdp->xdf_dev_lk); 322 if (xdf_connect(vdp, B_TRUE) != XD_READY) { 323 mutex_exit(&vdp->xdf_dev_lk); 324 goto out; 325 } 326 mutex_exit(&vdp->xdf_dev_lk); 327 328 if (cmlb_partinfo(vdp->xdf_vd_lbl, XDF_PART(getminor(dev)), &p_blkcnt, 329 NULL, NULL, NULL, NULL) == 0) 330 return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags, 331 name, valuep, lengthp, (uint64_t)p_blkcnt)); 332 333 out: 334 return (ddi_prop_op(dev, dip, prop_op, mod_flags, name, valuep, 335 lengthp)); 336 } 337 338 static int 339 xdf_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 340 { 341 xdf_t *vdp; 342 ddi_iblock_cookie_t softibc; 343 int instance; 344 345 xdfdebug = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_NOTPROM, 346 "xdfdebug", 0); 347 348 switch (cmd) { 349 case DDI_ATTACH: 350 break; 351 352 case DDI_RESUME: 353 return (xdf_resume(devi)); 354 355 default: 356 return (DDI_FAILURE); 357 } 358 359 instance = ddi_get_instance(devi); 360 if (ddi_soft_state_zalloc(vbd_ss, instance) != DDI_SUCCESS) 361 return (DDI_FAILURE); 362 363 DPRINTF(DDI_DBG, ("xdf%d: attaching\n", instance)); 364 vdp = ddi_get_soft_state(vbd_ss, instance); 365 ddi_set_driver_private(devi, vdp); 366 vdp->xdf_dip = devi; 367 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 368 369 if (ddi_get_iblock_cookie(devi, 0, &vdp->xdf_ibc) != DDI_SUCCESS) { 370 cmn_err(CE_WARN, "xdf@%s: failed to get iblock cookie", 371 ddi_get_name_addr(devi)); 372 goto errout0; 373 } 374 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)vdp->xdf_ibc); 375 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)vdp->xdf_ibc); 376 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, 377 (void *)vdp->xdf_ibc); 378 379 if (ddi_get_soft_iblock_cookie(devi, DDI_SOFTINT_LOW, &softibc) 380 != DDI_SUCCESS) { 381 cmn_err(CE_WARN, "xdf@%s: failed to get softintr iblock cookie", 382 ddi_get_name_addr(devi)); 383 goto errout0; 384 } 385 if (ddi_add_softintr(devi, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 386 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 387 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 388 ddi_get_name_addr(devi)); 389 goto errout0; 390 } 391 392 #if !defined(XPV_HVM_DRIVER) 393 /* create kstat for iostat(1M) */ 394 if (xdf_kstat_create(devi, "xdf", instance) != 0) { 395 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 396 ddi_get_name_addr(devi)); 397 goto errout0; 398 } 399 #endif /* !XPV_HVM_DRIVER */ 400 401 /* driver handles kernel-issued IOCTLs */ 402 if (ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP, 403 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 404 cmn_err(CE_WARN, "xdf@%s: cannot create DDI_KERNEL_IOCTL prop", 405 ddi_get_name_addr(devi)); 406 goto errout0; 407 } 408 409 /* 410 * Initialize the physical geometry stucture. Note that currently 411 * we don't know the size of the backend device so the number 412 * of blocks on the device will be initialized to zero. Once 413 * we connect to the backend device we'll update the physical 414 * geometry to reflect the real size of the device. 415 */ 416 xdf_synthetic_pgeom(devi, &vdp->xdf_pgeom); 417 418 /* 419 * create default device minor nodes: non-removable disk 420 * we will adjust minor nodes after we are connected w/ backend 421 */ 422 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 423 if (cmlb_attach(devi, &xdf_lb_ops, DTYPE_DIRECT, 0, 1, DDI_NT_BLOCK, 424 #if defined(XPV_HVM_DRIVER) 425 CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT | 426 CMLB_INTERNAL_MINOR_NODES, 427 #else /* !XPV_HVM_DRIVER */ 428 CMLB_FAKE_LABEL_ONE_PARTITION, 429 #endif /* !XPV_HVM_DRIVER */ 430 vdp->xdf_vd_lbl, NULL) != 0) { 431 cmn_err(CE_WARN, "xdf@%s: default cmlb attach failed", 432 ddi_get_name_addr(devi)); 433 goto errout0; 434 } 435 436 /* 437 * We ship with cache-enabled disks 438 */ 439 vdp->xdf_wce = 1; 440 441 mutex_enter(&vdp->xdf_cb_lk); 442 443 /* Watch backend XenbusState change */ 444 if (xvdi_add_event_handler(devi, XS_OE_STATE, 445 xdf_oe_change) != DDI_SUCCESS) { 446 mutex_exit(&vdp->xdf_cb_lk); 447 goto errout0; 448 } 449 450 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 451 cmn_err(CE_WARN, "xdf@%s: start connection failed", 452 ddi_get_name_addr(devi)); 453 (void) xdf_start_disconnect(vdp); 454 mutex_exit(&vdp->xdf_cb_lk); 455 goto errout1; 456 } 457 458 mutex_exit(&vdp->xdf_cb_lk); 459 460 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 461 offsetof(v_req_t, v_link)); 462 list_create(&vdp->xdf_gs_act, sizeof (ge_slot_t), 463 offsetof(ge_slot_t, link)); 464 465 #if defined(XPV_HVM_DRIVER) 466 xdf_hvm_add(devi); 467 468 (void) ddi_prop_update_int(DDI_DEV_T_NONE, devi, DDI_NO_AUTODETACH, 1); 469 470 /* 471 * Report our version to dom0. 472 */ 473 if (xenbus_printf(XBT_NULL, "hvmpv/xdf", "version", "%d", 474 HVMPV_XDF_VERS)) 475 cmn_err(CE_WARN, "xdf: couldn't write version\n"); 476 #endif /* XPV_HVM_DRIVER */ 477 478 ddi_report_dev(devi); 479 480 DPRINTF(DDI_DBG, ("xdf%d: attached\n", instance)); 481 482 return (DDI_SUCCESS); 483 484 errout1: 485 xvdi_remove_event_handler(devi, XS_OE_STATE); 486 errout0: 487 if (vdp->xdf_vd_lbl != NULL) { 488 cmlb_detach(vdp->xdf_vd_lbl, NULL); 489 cmlb_free_handle(&vdp->xdf_vd_lbl); 490 } 491 #if !defined(XPV_HVM_DRIVER) 492 xdf_kstat_delete(devi); 493 #endif /* !XPV_HVM_DRIVER */ 494 if (vdp->xdf_softintr_id != NULL) 495 ddi_remove_softintr(vdp->xdf_softintr_id); 496 if (vdp->xdf_ibc != NULL) { 497 mutex_destroy(&vdp->xdf_cb_lk); 498 mutex_destroy(&vdp->xdf_dev_lk); 499 } 500 cv_destroy(&vdp->xdf_dev_cv); 501 ddi_soft_state_free(vbd_ss, instance); 502 ddi_set_driver_private(devi, NULL); 503 ddi_prop_remove_all(devi); 504 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(devi)); 505 return (DDI_FAILURE); 506 } 507 508 static int 509 xdf_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 510 { 511 xdf_t *vdp; 512 int instance; 513 514 switch (cmd) { 515 516 case DDI_PM_SUSPEND: 517 break; 518 519 case DDI_SUSPEND: 520 return (xdf_suspend(devi)); 521 522 case DDI_DETACH: 523 break; 524 525 default: 526 return (DDI_FAILURE); 527 } 528 529 instance = ddi_get_instance(devi); 530 DPRINTF(DDI_DBG, ("xdf%d: detaching\n", instance)); 531 vdp = ddi_get_soft_state(vbd_ss, instance); 532 533 if (vdp == NULL) 534 return (DDI_FAILURE); 535 536 mutex_enter(&vdp->xdf_dev_lk); 537 if (xdf_isopen(vdp, -1)) { 538 mutex_exit(&vdp->xdf_dev_lk); 539 return (DDI_FAILURE); 540 } 541 542 if (vdp->xdf_status != XD_CLOSED) { 543 mutex_exit(&vdp->xdf_dev_lk); 544 return (DDI_FAILURE); 545 } 546 547 #if defined(XPV_HVM_DRIVER) 548 xdf_hvm_rm(devi); 549 #endif /* XPV_HVM_DRIVER */ 550 551 ASSERT(!ISDMACBON(vdp)); 552 mutex_exit(&vdp->xdf_dev_lk); 553 554 if (vdp->xdf_timeout_id != 0) 555 (void) untimeout(vdp->xdf_timeout_id); 556 557 xvdi_remove_event_handler(devi, XS_OE_STATE); 558 559 /* we'll support backend running in domU later */ 560 #ifdef DOMU_BACKEND 561 (void) xvdi_post_event(devi, XEN_HP_REMOVE); 562 #endif 563 564 list_destroy(&vdp->xdf_vreq_act); 565 list_destroy(&vdp->xdf_gs_act); 566 ddi_prop_remove_all(devi); 567 xdf_kstat_delete(devi); 568 ddi_remove_softintr(vdp->xdf_softintr_id); 569 ddi_set_driver_private(devi, NULL); 570 cv_destroy(&vdp->xdf_dev_cv); 571 mutex_destroy(&vdp->xdf_cb_lk); 572 mutex_destroy(&vdp->xdf_dev_lk); 573 if (vdp->xdf_cache_flush_block != NULL) 574 kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE); 575 ddi_soft_state_free(vbd_ss, instance); 576 return (DDI_SUCCESS); 577 } 578 579 static int 580 xdf_suspend(dev_info_t *devi) 581 { 582 xdf_t *vdp; 583 int instance; 584 enum xdf_state st; 585 586 instance = ddi_get_instance(devi); 587 588 if (xdfdebug & SUSRES_DBG) 589 xen_printf("xdf_suspend: xdf#%d\n", instance); 590 591 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 592 return (DDI_FAILURE); 593 594 xvdi_suspend(devi); 595 596 mutex_enter(&vdp->xdf_cb_lk); 597 mutex_enter(&vdp->xdf_dev_lk); 598 st = vdp->xdf_status; 599 /* change status to stop further I/O requests */ 600 if (st == XD_READY) 601 vdp->xdf_status = XD_SUSPEND; 602 mutex_exit(&vdp->xdf_dev_lk); 603 mutex_exit(&vdp->xdf_cb_lk); 604 605 /* make sure no more I/O responses left in the ring buffer */ 606 if ((st == XD_INIT) || (st == XD_READY)) { 607 #ifdef XPV_HVM_DRIVER 608 ec_unbind_evtchn(vdp->xdf_evtchn); 609 xvdi_free_evtchn(devi); 610 #else /* !XPV_HVM_DRIVER */ 611 (void) ddi_remove_intr(devi, 0, NULL); 612 #endif /* !XPV_HVM_DRIVER */ 613 (void) xdf_drain_io(vdp); 614 /* 615 * no need to teardown the ring buffer here 616 * it will be simply re-init'ed during resume when 617 * we call xvdi_alloc_ring 618 */ 619 } 620 621 if (xdfdebug & SUSRES_DBG) 622 xen_printf("xdf_suspend: SUCCESS\n"); 623 624 return (DDI_SUCCESS); 625 } 626 627 /*ARGSUSED*/ 628 static int 629 xdf_resume(dev_info_t *devi) 630 { 631 xdf_t *vdp; 632 int instance; 633 634 instance = ddi_get_instance(devi); 635 if (xdfdebug & SUSRES_DBG) 636 xen_printf("xdf_resume: xdf%d\n", instance); 637 638 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 639 return (DDI_FAILURE); 640 641 mutex_enter(&vdp->xdf_cb_lk); 642 643 if (xvdi_resume(devi) != DDI_SUCCESS) { 644 mutex_exit(&vdp->xdf_cb_lk); 645 return (DDI_FAILURE); 646 } 647 648 mutex_enter(&vdp->xdf_dev_lk); 649 ASSERT(vdp->xdf_status != XD_READY); 650 vdp->xdf_status = XD_UNKNOWN; 651 mutex_exit(&vdp->xdf_dev_lk); 652 653 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 654 mutex_exit(&vdp->xdf_cb_lk); 655 return (DDI_FAILURE); 656 } 657 658 mutex_exit(&vdp->xdf_cb_lk); 659 660 if (xdfdebug & SUSRES_DBG) 661 xen_printf("xdf_resume: done\n"); 662 return (DDI_SUCCESS); 663 } 664 665 /*ARGSUSED*/ 666 static int 667 xdf_reset(dev_info_t *devi, ddi_reset_cmd_t cmd) 668 { 669 xdf_t *vdp; 670 int instance; 671 672 instance = ddi_get_instance(devi); 673 DPRINTF(DDI_DBG, ("xdf%d: resetting\n", instance)); 674 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 675 return (DDI_FAILURE); 676 677 /* 678 * wait for any outstanding I/O to complete 679 */ 680 (void) xdf_drain_io(vdp); 681 682 DPRINTF(DDI_DBG, ("xdf%d: reset complete\n", instance)); 683 return (DDI_SUCCESS); 684 } 685 686 static int 687 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 688 { 689 minor_t minor; 690 xdf_t *vdp; 691 int part; 692 ulong_t parbit; 693 diskaddr_t p_blkct = 0; 694 boolean_t firstopen; 695 boolean_t nodelay; 696 697 minor = getminor(*devp); 698 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 699 return (ENXIO); 700 701 nodelay = (flag & (FNDELAY | FNONBLOCK)); 702 703 DPRINTF(DDI_DBG, ("xdf%d: opening\n", XDF_INST(minor))); 704 705 /* do cv_wait until connected or failed */ 706 mutex_enter(&vdp->xdf_dev_lk); 707 if (!nodelay && (xdf_connect(vdp, B_TRUE) != XD_READY)) { 708 mutex_exit(&vdp->xdf_dev_lk); 709 return (ENXIO); 710 } 711 712 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 713 mutex_exit(&vdp->xdf_dev_lk); 714 return (EROFS); 715 } 716 717 part = XDF_PART(minor); 718 parbit = 1 << part; 719 if ((vdp->xdf_vd_exclopen & parbit) || 720 ((flag & FEXCL) && xdf_isopen(vdp, part))) { 721 mutex_exit(&vdp->xdf_dev_lk); 722 return (EBUSY); 723 } 724 725 /* are we the first one to open this node? */ 726 firstopen = !xdf_isopen(vdp, -1); 727 728 if (otyp == OTYP_LYR) 729 vdp->xdf_vd_lyropen[part]++; 730 731 vdp->xdf_vd_open[otyp] |= parbit; 732 733 if (flag & FEXCL) 734 vdp->xdf_vd_exclopen |= parbit; 735 736 mutex_exit(&vdp->xdf_dev_lk); 737 738 /* force a re-validation */ 739 if (firstopen) 740 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 741 742 /* 743 * check size 744 * ignore CD/DVD which contains a zero-sized s0 745 */ 746 if (!nodelay && !XD_IS_CD(vdp) && 747 ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 748 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0))) { 749 (void) xdf_close(*devp, flag, otyp, credp); 750 return (ENXIO); 751 } 752 753 return (0); 754 } 755 756 /*ARGSUSED*/ 757 static int 758 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 759 { 760 minor_t minor; 761 xdf_t *vdp; 762 int part; 763 ulong_t parbit; 764 765 minor = getminor(dev); 766 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 767 return (ENXIO); 768 769 mutex_enter(&vdp->xdf_dev_lk); 770 part = XDF_PART(minor); 771 if (!xdf_isopen(vdp, part)) { 772 mutex_exit(&vdp->xdf_dev_lk); 773 return (ENXIO); 774 } 775 parbit = 1 << part; 776 777 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); 778 if (otyp == OTYP_LYR) { 779 ASSERT(vdp->xdf_vd_lyropen[part] > 0); 780 if (--vdp->xdf_vd_lyropen[part] == 0) 781 vdp->xdf_vd_open[otyp] &= ~parbit; 782 } else { 783 vdp->xdf_vd_open[otyp] &= ~parbit; 784 } 785 vdp->xdf_vd_exclopen &= ~parbit; 786 787 mutex_exit(&vdp->xdf_dev_lk); 788 return (0); 789 } 790 791 static int 792 xdf_strategy(struct buf *bp) 793 { 794 xdf_t *vdp; 795 minor_t minor; 796 diskaddr_t p_blkct, p_blkst; 797 ulong_t nblks; 798 int part; 799 800 minor = getminor(bp->b_edev); 801 part = XDF_PART(minor); 802 803 vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)); 804 if ((vdp == NULL) || !xdf_isopen(vdp, part)) { 805 bioerror(bp, ENXIO); 806 bp->b_resid = bp->b_bcount; 807 biodone(bp); 808 return (0); 809 } 810 811 /* Check for writes to a read only device */ 812 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 813 bioerror(bp, EROFS); 814 bp->b_resid = bp->b_bcount; 815 biodone(bp); 816 return (0); 817 } 818 819 /* Check if this I/O is accessing a partition or the entire disk */ 820 if ((long)bp->b_private == XB_SLICE_NONE) { 821 /* This I/O is using an absolute offset */ 822 p_blkct = vdp->xdf_xdev_nblocks; 823 p_blkst = 0; 824 } else { 825 /* This I/O is using a partition relative offset */ 826 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 827 &p_blkst, NULL, NULL, NULL)) { 828 bioerror(bp, ENXIO); 829 bp->b_resid = bp->b_bcount; 830 biodone(bp); 831 return (0); 832 } 833 } 834 835 /* check for a starting block beyond the disk or partition limit */ 836 if (bp->b_blkno > p_blkct) { 837 DPRINTF(IO_DBG, ("xdf: block %lld exceeds VBD size %"PRIu64, 838 (longlong_t)bp->b_blkno, (uint64_t)p_blkct)); 839 bioerror(bp, EINVAL); 840 bp->b_resid = bp->b_bcount; 841 biodone(bp); 842 return (0); 843 } 844 845 /* Legacy: don't set error flag at this case */ 846 if (bp->b_blkno == p_blkct) { 847 bp->b_resid = bp->b_bcount; 848 biodone(bp); 849 return (0); 850 } 851 852 /* Adjust for partial transfer */ 853 nblks = bp->b_bcount >> XB_BSHIFT; 854 if ((bp->b_blkno + nblks) > p_blkct) { 855 bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT; 856 bp->b_bcount -= bp->b_resid; 857 } 858 859 DPRINTF(IO_DBG, ("xdf: strategy blk %lld len %lu\n", 860 (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount)); 861 862 /* Fix up the buf struct */ 863 bp->b_flags |= B_BUSY; 864 bp->av_forw = bp->av_back = NULL; /* not tagged with a v_req */ 865 bp->b_private = (void *)(uintptr_t)p_blkst; 866 867 mutex_enter(&vdp->xdf_dev_lk); 868 if (vdp->xdf_xdev_iostat != NULL) 869 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 870 if (vdp->xdf_f_act == NULL) { 871 vdp->xdf_f_act = vdp->xdf_l_act = bp; 872 } else { 873 vdp->xdf_l_act->av_forw = bp; 874 vdp->xdf_l_act = bp; 875 } 876 mutex_exit(&vdp->xdf_dev_lk); 877 878 xdf_iostart(vdp); 879 if (do_polled_io) 880 (void) xdf_drain_io(vdp); 881 return (0); 882 } 883 884 /*ARGSUSED*/ 885 static int 886 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 887 { 888 889 xdf_t *vdp; 890 minor_t minor; 891 diskaddr_t p_blkcnt; 892 int part; 893 894 minor = getminor(dev); 895 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 896 return (ENXIO); 897 898 DPRINTF(IO_DBG, ("xdf: read offset 0x%"PRIx64"\n", 899 (int64_t)uiop->uio_offset)); 900 901 part = XDF_PART(minor); 902 if (!xdf_isopen(vdp, part)) 903 return (ENXIO); 904 905 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 906 NULL, NULL, NULL, NULL)) 907 return (ENXIO); 908 909 if (U_INVAL(uiop)) 910 return (EINVAL); 911 912 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 913 } 914 915 /*ARGSUSED*/ 916 static int 917 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 918 { 919 xdf_t *vdp; 920 minor_t minor; 921 diskaddr_t p_blkcnt; 922 int part; 923 924 minor = getminor(dev); 925 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 926 return (ENXIO); 927 928 DPRINTF(IO_DBG, ("xdf: write offset 0x%"PRIx64"\n", 929 (int64_t)uiop->uio_offset)); 930 931 part = XDF_PART(minor); 932 if (!xdf_isopen(vdp, part)) 933 return (ENXIO); 934 935 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 936 NULL, NULL, NULL, NULL)) 937 return (ENXIO); 938 939 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 940 return (ENOSPC); 941 942 if (U_INVAL(uiop)) 943 return (EINVAL); 944 945 return (physio(xdf_strategy, NULL, dev, B_WRITE, minphys, uiop)); 946 } 947 948 /*ARGSUSED*/ 949 static int 950 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 951 { 952 xdf_t *vdp; 953 minor_t minor; 954 struct uio *uiop = aiop->aio_uio; 955 diskaddr_t p_blkcnt; 956 int part; 957 958 minor = getminor(dev); 959 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 960 return (ENXIO); 961 962 part = XDF_PART(minor); 963 if (!xdf_isopen(vdp, part)) 964 return (ENXIO); 965 966 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 967 NULL, NULL, NULL, NULL)) 968 return (ENXIO); 969 970 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 971 return (ENOSPC); 972 973 if (U_INVAL(uiop)) 974 return (EINVAL); 975 976 return (aphysio(xdf_strategy, anocancel, dev, B_READ, minphys, aiop)); 977 } 978 979 /*ARGSUSED*/ 980 static int 981 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 982 { 983 xdf_t *vdp; 984 minor_t minor; 985 struct uio *uiop = aiop->aio_uio; 986 diskaddr_t p_blkcnt; 987 int part; 988 989 minor = getminor(dev); 990 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 991 return (ENXIO); 992 993 part = XDF_PART(minor); 994 if (!xdf_isopen(vdp, part)) 995 return (ENXIO); 996 997 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 998 NULL, NULL, NULL, NULL)) 999 return (ENXIO); 1000 1001 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 1002 return (ENOSPC); 1003 1004 if (U_INVAL(uiop)) 1005 return (EINVAL); 1006 1007 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, minphys, aiop)); 1008 } 1009 1010 static int 1011 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1012 { 1013 struct buf dumpbuf, *dbp; 1014 xdf_t *vdp; 1015 minor_t minor; 1016 int err = 0; 1017 int part; 1018 diskaddr_t p_blkcnt, p_blkst; 1019 1020 minor = getminor(dev); 1021 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 1022 return (ENXIO); 1023 1024 DPRINTF(IO_DBG, ("xdf: dump addr (0x%p) blk (%ld) nblks (%d)\n", 1025 addr, blkno, nblk)); 1026 1027 part = XDF_PART(minor); 1028 if (!xdf_isopen(vdp, part)) 1029 return (ENXIO); 1030 1031 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 1032 NULL, NULL, NULL)) 1033 return (ENXIO); 1034 1035 if ((blkno + nblk) > p_blkcnt) { 1036 cmn_err(CE_WARN, "xdf: block %ld exceeds VBD size %"PRIu64, 1037 blkno + nblk, (uint64_t)p_blkcnt); 1038 return (EINVAL); 1039 } 1040 1041 dbp = &dumpbuf; 1042 bioinit(dbp); 1043 dbp->b_flags = B_BUSY; 1044 dbp->b_un.b_addr = addr; 1045 dbp->b_bcount = nblk << DEV_BSHIFT; 1046 dbp->b_blkno = blkno; 1047 dbp->b_edev = dev; 1048 dbp->b_private = (void *)(uintptr_t)p_blkst; 1049 1050 mutex_enter(&vdp->xdf_dev_lk); 1051 if (vdp->xdf_xdev_iostat != NULL) 1052 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1053 if (vdp->xdf_f_act == NULL) { 1054 vdp->xdf_f_act = vdp->xdf_l_act = dbp; 1055 } else { 1056 vdp->xdf_l_act->av_forw = dbp; 1057 vdp->xdf_l_act = dbp; 1058 } 1059 dbp->av_forw = NULL; 1060 dbp->av_back = NULL; 1061 mutex_exit(&vdp->xdf_dev_lk); 1062 xdf_iostart(vdp); 1063 err = xdf_drain_io(vdp); 1064 biofini(dbp); 1065 return (err); 1066 } 1067 1068 /*ARGSUSED*/ 1069 static int 1070 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 1071 int *rvalp) 1072 { 1073 int instance; 1074 xdf_t *vdp; 1075 minor_t minor; 1076 int part; 1077 1078 minor = getminor(dev); 1079 instance = XDF_INST(minor); 1080 1081 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 1082 return (ENXIO); 1083 1084 DPRINTF(IOCTL_DBG, ("xdf%d:ioctl: cmd %d (0x%x)\n", 1085 instance, cmd, cmd)); 1086 1087 part = XDF_PART(minor); 1088 if (!xdf_isopen(vdp, part)) 1089 return (ENXIO); 1090 1091 switch (cmd) { 1092 case DKIOCGMEDIAINFO: { 1093 struct dk_minfo media_info; 1094 1095 media_info.dki_lbsize = DEV_BSIZE; 1096 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; 1097 media_info.dki_media_type = DK_FIXED_DISK; 1098 1099 if (ddi_copyout(&media_info, (void *)arg, 1100 sizeof (struct dk_minfo), mode)) { 1101 return (EFAULT); 1102 } else { 1103 return (0); 1104 } 1105 } 1106 1107 case DKIOCINFO: { 1108 struct dk_cinfo info; 1109 1110 /* controller information */ 1111 if (XD_IS_CD(vdp)) 1112 info.dki_ctype = DKC_CDROM; 1113 else 1114 info.dki_ctype = DKC_VBD; 1115 1116 info.dki_cnum = 0; 1117 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 1118 1119 /* unit information */ 1120 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 1121 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 1122 info.dki_flags = DKI_FMTVOL; 1123 info.dki_partition = part; 1124 info.dki_maxtransfer = maxphys / DEV_BSIZE; 1125 info.dki_addr = 0; 1126 info.dki_space = 0; 1127 info.dki_prio = 0; 1128 info.dki_vec = 0; 1129 1130 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 1131 return (EFAULT); 1132 else 1133 return (0); 1134 } 1135 1136 case DKIOCSTATE: { 1137 enum dkio_state dkstate = DKIO_INSERTED; 1138 if (ddi_copyout(&dkstate, (void *)arg, sizeof (dkstate), 1139 mode) != 0) 1140 return (EFAULT); 1141 return (0); 1142 } 1143 1144 /* 1145 * is media removable? 1146 */ 1147 case DKIOCREMOVABLE: { 1148 int i = XD_IS_RM(vdp) ? 1 : 0; 1149 if (ddi_copyout(&i, (caddr_t)arg, sizeof (int), mode)) 1150 return (EFAULT); 1151 return (0); 1152 } 1153 1154 case DKIOCG_PHYGEOM: 1155 case DKIOCG_VIRTGEOM: 1156 case DKIOCGGEOM: 1157 case DKIOCSGEOM: 1158 case DKIOCGAPART: 1159 case DKIOCSAPART: 1160 case DKIOCGVTOC: 1161 case DKIOCSVTOC: 1162 case DKIOCPARTINFO: 1163 case DKIOCGMBOOT: 1164 case DKIOCSMBOOT: 1165 case DKIOCGETEFI: 1166 case DKIOCSETEFI: 1167 case DKIOCPARTITION: { 1168 int rc; 1169 1170 rc = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 1171 rvalp, NULL); 1172 return (rc); 1173 } 1174 1175 case DKIOCGETWCE: 1176 if (ddi_copyout(&vdp->xdf_wce, (void *)arg, 1177 sizeof (vdp->xdf_wce), mode)) 1178 return (EFAULT); 1179 return (0); 1180 case DKIOCSETWCE: 1181 if (ddi_copyin((void *)arg, &vdp->xdf_wce, 1182 sizeof (vdp->xdf_wce), mode)) 1183 return (EFAULT); 1184 return (0); 1185 case DKIOCFLUSHWRITECACHE: { 1186 int rc; 1187 struct dk_callback *dkc = (struct dk_callback *)arg; 1188 1189 if (vdp->xdf_flush_supported) { 1190 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1191 NULL, 0, 0, (void *)dev); 1192 } else if (vdp->xdf_feature_barrier && 1193 !xdf_barrier_flush_disable) { 1194 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1195 vdp->xdf_cache_flush_block, xdf_flush_block, 1196 DEV_BSIZE, (void *)dev); 1197 } else { 1198 return (ENOTTY); 1199 } 1200 if ((mode & FKIOCTL) && (dkc != NULL) && 1201 (dkc->dkc_callback != NULL)) { 1202 (*dkc->dkc_callback)(dkc->dkc_cookie, rc); 1203 /* need to return 0 after calling callback */ 1204 rc = 0; 1205 } 1206 return (rc); 1207 } 1208 1209 default: 1210 return (ENOTTY); 1211 } 1212 } 1213 1214 /* 1215 * xdf interrupt handler 1216 */ 1217 static uint_t 1218 xdf_intr(caddr_t arg) 1219 { 1220 xdf_t *vdp = (xdf_t *)arg; 1221 xendev_ring_t *xbr; 1222 blkif_response_t *resp; 1223 int bioerr; 1224 uint64_t id; 1225 extern int do_polled_io; 1226 uint8_t op; 1227 uint16_t status; 1228 ddi_acc_handle_t acchdl; 1229 1230 mutex_enter(&vdp->xdf_dev_lk); 1231 1232 if ((xbr = vdp->xdf_xb_ring) == NULL) { 1233 mutex_exit(&vdp->xdf_dev_lk); 1234 return (DDI_INTR_UNCLAIMED); 1235 } 1236 1237 acchdl = vdp->xdf_xb_ring_hdl; 1238 1239 /* 1240 * complete all requests which have a response 1241 */ 1242 while (resp = xvdi_ring_get_response(xbr)) { 1243 id = ddi_get64(acchdl, &resp->id); 1244 op = ddi_get8(acchdl, &resp->operation); 1245 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 1246 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 1247 op, id, status)); 1248 1249 /* 1250 * XXPV - close connection to the backend and restart 1251 */ 1252 if (status != BLKIF_RSP_OKAY) { 1253 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 1254 ddi_get_name_addr(vdp->xdf_dip), 1255 (op == BLKIF_OP_READ) ? "reading" : "writing")); 1256 bioerr = EIO; 1257 } else { 1258 bioerr = 0; 1259 } 1260 1261 xdf_iofini(vdp, id, bioerr); 1262 } 1263 1264 mutex_exit(&vdp->xdf_dev_lk); 1265 1266 if (!do_polled_io) 1267 xdf_iostart(vdp); 1268 1269 return (DDI_INTR_CLAIMED); 1270 } 1271 1272 int xdf_fbrewrites; /* how many times was our flush block rewritten */ 1273 1274 /* 1275 * Snarf new data if our flush block was re-written 1276 */ 1277 static void 1278 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 1279 { 1280 int nblks; 1281 boolean_t mapin; 1282 1283 if (IS_WRITE_BARRIER(vdp, bp)) 1284 return; /* write was a flush write */ 1285 1286 mapin = B_FALSE; 1287 nblks = bp->b_bcount >> DEV_BSHIFT; 1288 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 1289 xdf_fbrewrites++; 1290 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 1291 mapin = B_TRUE; 1292 bp_mapin(bp); 1293 } 1294 bcopy(bp->b_un.b_addr + 1295 ((xdf_flush_block - blkno) << DEV_BSHIFT), 1296 vdp->xdf_cache_flush_block, DEV_BSIZE); 1297 if (mapin) 1298 bp_mapout(bp); 1299 } 1300 } 1301 1302 static void 1303 xdf_iofini(xdf_t *vdp, uint64_t id, int bioerr) 1304 { 1305 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 1306 v_req_t *vreq = gs->vreq; 1307 buf_t *bp = vreq->v_buf; 1308 1309 gs_free(vdp, gs); 1310 if (bioerr) 1311 bioerror(bp, bioerr); 1312 vreq->v_nslots--; 1313 if (vreq->v_nslots != 0) 1314 return; 1315 1316 XDF_UPDATE_IO_STAT(vdp, bp); 1317 if (vdp->xdf_xdev_iostat != NULL) 1318 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1319 1320 if (IS_ERROR(bp)) 1321 bp->b_resid = bp->b_bcount; 1322 1323 vreq_free(vdp, vreq); 1324 biodone(bp); 1325 } 1326 1327 /* 1328 * return value of xdf_prepare_rreq() 1329 * used in xdf_iostart() 1330 */ 1331 #define XF_PARTIAL 0 /* rreq is full, not all I/O in buf transferred */ 1332 #define XF_COMP 1 /* no more I/O left in buf */ 1333 1334 static void 1335 xdf_iostart(xdf_t *vdp) 1336 { 1337 xendev_ring_t *xbr; 1338 struct buf *bp; 1339 blkif_request_t *rreq; 1340 int retval; 1341 int rreqready = 0; 1342 1343 xbr = vdp->xdf_xb_ring; 1344 1345 /* 1346 * populate the ring request(s) 1347 * 1348 * loop until there is no buf to transfer or no free slot 1349 * available in I/O ring 1350 */ 1351 mutex_enter(&vdp->xdf_dev_lk); 1352 1353 for (;;) { 1354 if (vdp->xdf_status != XD_READY) 1355 break; 1356 1357 /* active buf queue empty? */ 1358 if ((bp = vdp->xdf_f_act) == NULL) 1359 break; 1360 1361 /* try to grab a vreq for this bp */ 1362 if ((BP2VREQ(bp) == NULL) && (vreq_get(vdp, bp) == NULL)) 1363 break; 1364 /* alloc DMA/GTE resources */ 1365 if (vreq_setup(vdp, BP2VREQ(bp)) != DDI_SUCCESS) 1366 break; 1367 1368 /* get next blkif_request in the ring */ 1369 if ((rreq = xvdi_ring_get_request(xbr)) == NULL) 1370 break; 1371 bzero(rreq, sizeof (blkif_request_t)); 1372 1373 /* populate blkif_request with this buf */ 1374 rreqready++; 1375 retval = xdf_prepare_rreq(vdp, bp, rreq); 1376 if (retval == XF_COMP) { 1377 /* finish this bp, switch to next one */ 1378 if (vdp->xdf_xdev_iostat != NULL) 1379 kstat_waitq_to_runq( 1380 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1381 vdp->xdf_f_act = bp->av_forw; 1382 bp->av_forw = NULL; 1383 } 1384 } 1385 1386 /* 1387 * Send the request(s) to the backend 1388 */ 1389 if (rreqready) { 1390 if (xvdi_ring_push_request(xbr)) { 1391 DPRINTF(IO_DBG, ("xdf_iostart: " 1392 "sent request(s) to backend\n")); 1393 xvdi_notify_oe(vdp->xdf_dip); 1394 } 1395 } 1396 1397 mutex_exit(&vdp->xdf_dev_lk); 1398 } 1399 1400 /* 1401 * populate a single blkif_request_t w/ a buf 1402 */ 1403 static int 1404 xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1405 { 1406 int rval; 1407 grant_ref_t gr; 1408 uint8_t fsect, lsect; 1409 size_t bcnt; 1410 paddr_t dma_addr; 1411 off_t blk_off; 1412 dev_info_t *dip = vdp->xdf_dip; 1413 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1414 v_req_t *vreq = BP2VREQ(bp); 1415 uint64_t blkno = vreq->v_blkno; 1416 uint_t ndmacs = vreq->v_ndmacs; 1417 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1418 int seg = 0; 1419 int isread = IS_READ(bp); 1420 1421 if (isread) 1422 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1423 else { 1424 switch (vreq->v_flush_diskcache) { 1425 case FLUSH_DISKCACHE: 1426 ddi_put8(acchdl, &rreq->operation, 1427 BLKIF_OP_FLUSH_DISKCACHE); 1428 ddi_put16(acchdl, &rreq->handle, vdev); 1429 ddi_put64(acchdl, &rreq->id, 1430 (uint64_t)(uintptr_t)(vreq->v_gs)); 1431 ddi_put8(acchdl, &rreq->nr_segments, 0); 1432 return (XF_COMP); 1433 case WRITE_BARRIER: 1434 ddi_put8(acchdl, &rreq->operation, 1435 BLKIF_OP_WRITE_BARRIER); 1436 break; 1437 default: 1438 if (!vdp->xdf_wce) 1439 ddi_put8(acchdl, &rreq->operation, 1440 BLKIF_OP_WRITE_BARRIER); 1441 else 1442 ddi_put8(acchdl, &rreq->operation, 1443 BLKIF_OP_WRITE); 1444 break; 1445 } 1446 } 1447 1448 ddi_put16(acchdl, &rreq->handle, vdev); 1449 ddi_put64(acchdl, &rreq->sector_number, blkno); 1450 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(vreq->v_gs)); 1451 1452 /* 1453 * loop until all segments are populated or no more dma cookie in buf 1454 */ 1455 for (;;) { 1456 /* 1457 * Each segment of a blkif request can transfer up to 1458 * one 4K page of data. 1459 */ 1460 bcnt = vreq->v_dmac.dmac_size; 1461 ASSERT(bcnt <= PAGESIZE); 1462 ASSERT((bcnt % XB_BSIZE) == 0); 1463 dma_addr = vreq->v_dmac.dmac_laddress; 1464 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1465 ASSERT((blk_off & XB_BMASK) == 0); 1466 fsect = blk_off >> XB_BSHIFT; 1467 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1468 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1469 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1470 DPRINTF(IO_DBG, (" ""seg%d: dmacS %lu blk_off %ld\n", 1471 seg, vreq->v_dmac.dmac_size, blk_off)); 1472 gr = gs_grant(vreq->v_gs, PATOMA(dma_addr) >> PAGESHIFT); 1473 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1474 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1475 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1476 DPRINTF(IO_DBG, (" ""seg%d: fs %d ls %d gr %d dma 0x%"PRIx64 1477 "\n", seg, fsect, lsect, gr, dma_addr)); 1478 1479 blkno += (bcnt >> XB_BSHIFT); 1480 seg++; 1481 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1482 if (--ndmacs) { 1483 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1484 continue; 1485 } 1486 1487 vreq->v_status = VREQ_DMAWIN_DONE; 1488 vreq->v_blkno = blkno; 1489 if (vreq->v_dmaw + 1 == vreq->v_ndmaws) 1490 /* last win */ 1491 rval = XF_COMP; 1492 else 1493 rval = XF_PARTIAL; 1494 break; 1495 } 1496 ddi_put8(acchdl, &rreq->nr_segments, seg); 1497 DPRINTF(IO_DBG, ("xdf_prepare_rreq: request id=%"PRIx64" ready\n", 1498 rreq->id)); 1499 1500 return (rval); 1501 } 1502 1503 #define XDF_QSEC 50000 /* .005 second */ 1504 #define XDF_POLLCNT 12 /* loop for 12 times before time out */ 1505 1506 static int 1507 xdf_drain_io(xdf_t *vdp) 1508 { 1509 int pollc, rval; 1510 xendev_ring_t *xbr; 1511 1512 if (xdfdebug & SUSRES_DBG) 1513 xen_printf("xdf_drain_io: start\n"); 1514 1515 mutex_enter(&vdp->xdf_dev_lk); 1516 1517 if ((vdp->xdf_status != XD_READY) && (vdp->xdf_status != XD_SUSPEND)) 1518 goto out; 1519 1520 rval = 0; 1521 xbr = vdp->xdf_xb_ring; 1522 ASSERT(xbr != NULL); 1523 1524 for (pollc = 0; pollc < XDF_POLLCNT; pollc++) { 1525 if (xvdi_ring_has_unconsumed_responses(xbr)) { 1526 mutex_exit(&vdp->xdf_dev_lk); 1527 (void) xdf_intr((caddr_t)vdp); 1528 mutex_enter(&vdp->xdf_dev_lk); 1529 } 1530 if (!xvdi_ring_has_incomp_request(xbr)) 1531 goto out; 1532 1533 #ifndef XPV_HVM_DRIVER 1534 (void) HYPERVISOR_yield(); 1535 #endif /* XPV_HVM_DRIVER */ 1536 /* 1537 * file-backed devices can be slow 1538 */ 1539 drv_usecwait(XDF_QSEC << pollc); 1540 } 1541 cmn_err(CE_WARN, "xdf_polled_io: timeout"); 1542 rval = EIO; 1543 out: 1544 mutex_exit(&vdp->xdf_dev_lk); 1545 if (xdfdebug & SUSRES_DBG) 1546 xen_printf("xdf_drain_io: end, err=%d\n", rval); 1547 return (rval); 1548 } 1549 1550 /* ARGSUSED5 */ 1551 int 1552 xdf_lb_rdwr(dev_info_t *devi, uchar_t cmd, void *bufp, 1553 diskaddr_t start, size_t reqlen, void *tg_cookie) 1554 { 1555 xdf_t *vdp; 1556 struct buf *bp; 1557 int err = 0; 1558 1559 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1560 if (vdp == NULL) 1561 return (ENXIO); 1562 1563 if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) 1564 return (EINVAL); 1565 1566 bp = getrbuf(KM_SLEEP); 1567 if (cmd == TG_READ) 1568 bp->b_flags = B_BUSY | B_READ; 1569 else 1570 bp->b_flags = B_BUSY | B_WRITE; 1571 bp->b_un.b_addr = bufp; 1572 bp->b_bcount = reqlen; 1573 bp->b_blkno = start; 1574 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 1575 1576 mutex_enter(&vdp->xdf_dev_lk); 1577 if (vdp->xdf_xdev_iostat != NULL) 1578 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1579 if (vdp->xdf_f_act == NULL) { 1580 vdp->xdf_f_act = vdp->xdf_l_act = bp; 1581 } else { 1582 vdp->xdf_l_act->av_forw = bp; 1583 vdp->xdf_l_act = bp; 1584 } 1585 mutex_exit(&vdp->xdf_dev_lk); 1586 xdf_iostart(vdp); 1587 err = biowait(bp); 1588 1589 ASSERT(bp->b_flags & B_DONE); 1590 1591 freerbuf(bp); 1592 return (err); 1593 } 1594 1595 /* 1596 * synthetic geometry 1597 */ 1598 #define XDF_NSECTS 256 1599 #define XDF_NHEADS 16 1600 1601 static void 1602 xdf_synthetic_pgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1603 { 1604 xdf_t *vdp; 1605 uint_t ncyl; 1606 1607 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1608 1609 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); 1610 1611 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1612 geomp->g_acyl = 0; 1613 geomp->g_nhead = XDF_NHEADS; 1614 geomp->g_secsize = XB_BSIZE; 1615 geomp->g_nsect = XDF_NSECTS; 1616 geomp->g_intrlv = 0; 1617 geomp->g_rpm = 7200; 1618 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1619 } 1620 1621 static int 1622 xdf_lb_getcap(dev_info_t *devi, diskaddr_t *capp) 1623 { 1624 xdf_t *vdp; 1625 1626 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1627 1628 if (vdp == NULL) 1629 return (ENXIO); 1630 1631 mutex_enter(&vdp->xdf_dev_lk); 1632 *capp = vdp->xdf_pgeom.g_capacity; 1633 DPRINTF(LBL_DBG, ("capacity %llu\n", *capp)); 1634 mutex_exit(&vdp->xdf_dev_lk); 1635 return (0); 1636 } 1637 1638 static int 1639 xdf_lb_getpgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1640 { 1641 xdf_t *vdp; 1642 1643 if ((vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi))) == NULL) 1644 return (ENXIO); 1645 *geomp = vdp->xdf_pgeom; 1646 return (0); 1647 } 1648 1649 /* 1650 * No real HBA, no geometry available from it 1651 */ 1652 /*ARGSUSED*/ 1653 static int 1654 xdf_lb_getvgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1655 { 1656 return (EINVAL); 1657 } 1658 1659 static int 1660 xdf_lb_getattribute(dev_info_t *devi, tg_attribute_t *tgattributep) 1661 { 1662 xdf_t *vdp; 1663 1664 if (!(vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)))) 1665 return (ENXIO); 1666 1667 if (XD_IS_RO(vdp)) 1668 tgattributep->media_is_writable = 0; 1669 else 1670 tgattributep->media_is_writable = 1; 1671 return (0); 1672 } 1673 1674 /* ARGSUSED3 */ 1675 int 1676 xdf_lb_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie) 1677 { 1678 switch (cmd) { 1679 case TG_GETPHYGEOM: 1680 return (xdf_lb_getpgeom(devi, (cmlb_geom_t *)arg)); 1681 case TG_GETVIRTGEOM: 1682 return (xdf_lb_getvgeom(devi, (cmlb_geom_t *)arg)); 1683 case TG_GETCAPACITY: 1684 return (xdf_lb_getcap(devi, (diskaddr_t *)arg)); 1685 case TG_GETBLOCKSIZE: 1686 *(uint32_t *)arg = XB_BSIZE; 1687 return (0); 1688 case TG_GETATTR: 1689 return (xdf_lb_getattribute(devi, (tg_attribute_t *)arg)); 1690 default: 1691 return (ENOTTY); 1692 } 1693 } 1694 1695 /* 1696 * Kick-off connect process 1697 * Status should be XD_UNKNOWN or XD_CLOSED 1698 * On success, status will be changed to XD_INIT 1699 * On error, status won't be changed 1700 */ 1701 static int 1702 xdf_start_connect(xdf_t *vdp) 1703 { 1704 char *xsnode; 1705 grant_ref_t gref; 1706 xenbus_transaction_t xbt; 1707 int rv; 1708 dev_info_t *dip = vdp->xdf_dip; 1709 1710 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == (domid_t)-1) 1711 goto errout; 1712 1713 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) { 1714 cmn_err(CE_WARN, "xdf@%s: failed to alloc event channel", 1715 ddi_get_name_addr(dip)); 1716 goto errout; 1717 } 1718 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1719 #ifdef XPV_HVM_DRIVER 1720 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1721 #else /* !XPV_HVM_DRIVER */ 1722 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1723 DDI_SUCCESS) { 1724 cmn_err(CE_WARN, "xdf_start_connect: xdf@%s: " 1725 "failed to add intr handler", ddi_get_name_addr(dip)); 1726 goto errout1; 1727 } 1728 #endif /* !XPV_HVM_DRIVER */ 1729 1730 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1731 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1732 DDI_SUCCESS) { 1733 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1734 ddi_get_name_addr(dip)); 1735 goto errout2; 1736 } 1737 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1738 1739 /* 1740 * Write into xenstore the info needed by backend 1741 */ 1742 if ((xsnode = xvdi_get_xsname(dip)) == NULL) { 1743 cmn_err(CE_WARN, "xdf@%s: " 1744 "failed to get xenstore node path", 1745 ddi_get_name_addr(dip)); 1746 goto fail_trans; 1747 } 1748 trans_retry: 1749 if (xenbus_transaction_start(&xbt)) { 1750 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1751 ddi_get_name_addr(dip)); 1752 xvdi_fatal_error(dip, EIO, "transaction start"); 1753 goto fail_trans; 1754 } 1755 1756 if (rv = xenbus_printf(xbt, xsnode, "ring-ref", "%u", gref)) { 1757 cmn_err(CE_WARN, "xdf@%s: failed to write ring-ref", 1758 ddi_get_name_addr(dip)); 1759 xvdi_fatal_error(dip, rv, "writing ring-ref"); 1760 goto abort_trans; 1761 } 1762 1763 if (rv = xenbus_printf(xbt, xsnode, "event-channel", "%u", 1764 vdp->xdf_evtchn)) { 1765 cmn_err(CE_WARN, "xdf@%s: failed to write event-channel", 1766 ddi_get_name_addr(dip)); 1767 xvdi_fatal_error(dip, rv, "writing event-channel"); 1768 goto abort_trans; 1769 } 1770 1771 /* 1772 * "protocol" is written by the domain builder in the case of PV 1773 * domains. However, it is not written for HVM domains, so let's 1774 * write it here. 1775 */ 1776 if (rv = xenbus_printf(xbt, xsnode, "protocol", "%s", 1777 XEN_IO_PROTO_ABI_NATIVE)) { 1778 cmn_err(CE_WARN, "xdf@%s: failed to write protocol", 1779 ddi_get_name_addr(dip)); 1780 xvdi_fatal_error(dip, rv, "writing protocol"); 1781 goto abort_trans; 1782 } 1783 1784 if ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0) { 1785 cmn_err(CE_WARN, "xdf@%s: " 1786 "failed to switch state to XenbusStateInitialised", 1787 ddi_get_name_addr(dip)); 1788 xvdi_fatal_error(dip, rv, "writing state"); 1789 goto abort_trans; 1790 } 1791 1792 /* kick-off connect process */ 1793 if (rv = xenbus_transaction_end(xbt, 0)) { 1794 if (rv == EAGAIN) 1795 goto trans_retry; 1796 cmn_err(CE_WARN, "xdf@%s: failed to end transaction", 1797 ddi_get_name_addr(dip)); 1798 xvdi_fatal_error(dip, rv, "completing transaction"); 1799 goto fail_trans; 1800 } 1801 1802 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1803 mutex_enter(&vdp->xdf_dev_lk); 1804 vdp->xdf_status = XD_INIT; 1805 mutex_exit(&vdp->xdf_dev_lk); 1806 1807 return (DDI_SUCCESS); 1808 1809 abort_trans: 1810 (void) xenbus_transaction_end(xbt, 1); 1811 fail_trans: 1812 xvdi_free_ring(vdp->xdf_xb_ring); 1813 errout2: 1814 #ifdef XPV_HVM_DRIVER 1815 ec_unbind_evtchn(vdp->xdf_evtchn); 1816 #else /* !XPV_HVM_DRIVER */ 1817 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1818 #endif /* !XPV_HVM_DRIVER */ 1819 errout1: 1820 xvdi_free_evtchn(dip); 1821 errout: 1822 cmn_err(CE_WARN, "xdf@%s: fail to kick-off connecting", 1823 ddi_get_name_addr(dip)); 1824 return (DDI_FAILURE); 1825 } 1826 1827 /* 1828 * Kick-off disconnect process 1829 * Status won't be changed 1830 */ 1831 static int 1832 xdf_start_disconnect(xdf_t *vdp) 1833 { 1834 if (xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed) > 0) { 1835 cmn_err(CE_WARN, "xdf@%s: fail to kick-off disconnecting", 1836 ddi_get_name_addr(vdp->xdf_dip)); 1837 return (DDI_FAILURE); 1838 } 1839 1840 return (DDI_SUCCESS); 1841 } 1842 1843 int 1844 xdf_get_flush_block(xdf_t *vdp) 1845 { 1846 /* 1847 * Get a DEV_BSIZE aligned bufer 1848 */ 1849 vdp->xdf_flush_mem = kmem_alloc(DEV_BSIZE * 2, KM_SLEEP); 1850 vdp->xdf_cache_flush_block = 1851 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), DEV_BSIZE); 1852 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1853 xdf_flush_block, DEV_BSIZE, NULL) != 0) 1854 return (DDI_FAILURE); 1855 return (DDI_SUCCESS); 1856 } 1857 1858 /* 1859 * Finish other initialization after we've connected to backend 1860 * Status should be XD_INIT before calling this routine 1861 * On success, status should be changed to XD_READY 1862 * On error, status should stay XD_INIT 1863 */ 1864 static int 1865 xdf_post_connect(xdf_t *vdp) 1866 { 1867 int rv; 1868 uint_t len; 1869 char *type; 1870 char *barrier; 1871 dev_info_t *devi = vdp->xdf_dip; 1872 1873 /* 1874 * Determine if feature barrier is supported by backend 1875 */ 1876 if (xenbus_read(XBT_NULL, xvdi_get_oename(devi), 1877 "feature-barrier", (void **)&barrier, &len) == 0) { 1878 vdp->xdf_feature_barrier = 1; 1879 kmem_free(barrier, len); 1880 } else { 1881 cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier", 1882 ddi_get_name_addr(vdp->xdf_dip)); 1883 vdp->xdf_feature_barrier = 0; 1884 } 1885 1886 /* probe backend */ 1887 if (rv = xenbus_gather(XBT_NULL, xvdi_get_oename(devi), 1888 "sectors", "%"SCNu64, &vdp->xdf_xdev_nblocks, 1889 "info", "%u", &vdp->xdf_xdev_info, NULL)) { 1890 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1891 "cannot read backend info", ddi_get_name_addr(devi)); 1892 xvdi_fatal_error(devi, rv, "reading backend info"); 1893 return (DDI_FAILURE); 1894 } 1895 1896 /* 1897 * Make sure that the device we're connecting isn't smaller than 1898 * the old connected device. 1899 */ 1900 if (vdp->xdf_xdev_nblocks < vdp->xdf_pgeom.g_capacity) { 1901 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1902 "backend disk device shrank", ddi_get_name_addr(devi)); 1903 /* XXX: call xvdi_fatal_error() here? */ 1904 xvdi_fatal_error(devi, rv, "reading backend info"); 1905 return (DDI_FAILURE); 1906 } 1907 1908 /* 1909 * Only update the physical geometry to reflect the new device 1910 * size if this is the first time we're connecting to the backend 1911 * device. Once we assign a physical geometry to a device it stays 1912 * fixed until: 1913 * - we get detach and re-attached (at which point we 1914 * automatically assign a new physical geometry). 1915 * - someone calls TG_SETPHYGEOM to explicity set the 1916 * physical geometry. 1917 */ 1918 if (vdp->xdf_pgeom.g_capacity == 0) 1919 xdf_synthetic_pgeom(devi, &vdp->xdf_pgeom); 1920 1921 /* fix disk type */ 1922 if (xenbus_read(XBT_NULL, xvdi_get_xsname(devi), "device-type", 1923 (void **)&type, &len) != 0) { 1924 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1925 "cannot read device-type", ddi_get_name_addr(devi)); 1926 xvdi_fatal_error(devi, rv, "reading device-type"); 1927 return (DDI_FAILURE); 1928 } 1929 if (strcmp(type, "cdrom") == 0) 1930 vdp->xdf_xdev_info |= VDISK_CDROM; 1931 kmem_free(type, len); 1932 1933 /* 1934 * We've created all the minor nodes via cmlb_attach() using default 1935 * value in xdf_attach() to make it possible to block in xdf_open(), 1936 * in case there's anyone (say, booting thread) ever trying to open 1937 * it before connected to backend. We will refresh all those minor 1938 * nodes w/ latest info we've got now when we are almost connected. 1939 * 1940 * Don't do this when xdf is already opened by someone (could happen 1941 * during resume), for that cmlb_attach() will invalid the label info 1942 * and confuse those who has already opened the node, which is bad. 1943 */ 1944 if (!xdf_isopen(vdp, -1) && (XD_IS_CD(vdp) || XD_IS_RM(vdp))) { 1945 /* re-init cmlb w/ latest info we got from backend */ 1946 if (cmlb_attach(devi, &xdf_lb_ops, 1947 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 1948 XD_IS_RM(vdp), 1, DDI_NT_BLOCK, 1949 #if defined(XPV_HVM_DRIVER) 1950 CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT | 1951 CMLB_INTERNAL_MINOR_NODES, 1952 #else /* !XPV_HVM_DRIVER */ 1953 CMLB_FAKE_LABEL_ONE_PARTITION, 1954 #endif /* !XPV_HVM_DRIVER */ 1955 vdp->xdf_vd_lbl, NULL) != 0) { 1956 cmn_err(CE_WARN, "xdf@%s: cmlb attach failed", 1957 ddi_get_name_addr(devi)); 1958 return (DDI_FAILURE); 1959 } 1960 } 1961 1962 /* mark vbd is ready for I/O */ 1963 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1964 mutex_enter(&vdp->xdf_dev_lk); 1965 vdp->xdf_status = XD_READY; 1966 mutex_exit(&vdp->xdf_dev_lk); 1967 /* 1968 * If backend has feature-barrier, see if it supports disk 1969 * cache flush op. 1970 */ 1971 vdp->xdf_flush_supported = 0; 1972 if (vdp->xdf_feature_barrier) { 1973 /* 1974 * Pretend we already know flush is supported so probe 1975 * will attempt the correct op. 1976 */ 1977 vdp->xdf_flush_supported = 1; 1978 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1979 vdp->xdf_flush_supported = 1; 1980 } else { 1981 vdp->xdf_flush_supported = 0; 1982 /* 1983 * If the other end does not support the cache flush op 1984 * then we must use a barrier-write to force disk 1985 * cache flushing. Barrier writes require that a data 1986 * block actually be written. 1987 * Cache a block to barrier-write when we are 1988 * asked to perform a flush. 1989 * XXX - would it be better to just copy 1 block 1990 * (512 bytes) from whatever write we did last 1991 * and rewrite that block? 1992 */ 1993 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) 1994 return (DDI_FAILURE); 1995 } 1996 } 1997 1998 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", ddi_get_name_addr(devi), 1999 (uint64_t)vdp->xdf_xdev_nblocks); 2000 2001 return (DDI_SUCCESS); 2002 } 2003 2004 /* 2005 * Finish other uninitialization after we've disconnected from backend 2006 * when status is XD_CLOSING or XD_INIT. After returns, status is XD_CLOSED 2007 */ 2008 static void 2009 xdf_post_disconnect(xdf_t *vdp) 2010 { 2011 #ifdef XPV_HVM_DRIVER 2012 ec_unbind_evtchn(vdp->xdf_evtchn); 2013 #else /* !XPV_HVM_DRIVER */ 2014 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 2015 #endif /* !XPV_HVM_DRIVER */ 2016 xvdi_free_evtchn(vdp->xdf_dip); 2017 xvdi_free_ring(vdp->xdf_xb_ring); 2018 vdp->xdf_xb_ring = NULL; 2019 vdp->xdf_xb_ring_hdl = NULL; 2020 vdp->xdf_peer = (domid_t)-1; 2021 2022 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 2023 mutex_enter(&vdp->xdf_dev_lk); 2024 vdp->xdf_status = XD_CLOSED; 2025 mutex_exit(&vdp->xdf_dev_lk); 2026 } 2027 2028 /*ARGSUSED*/ 2029 static void 2030 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 2031 { 2032 XenbusState new_state = *(XenbusState *)impl_data; 2033 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2034 boolean_t unexpect_die = B_FALSE; 2035 int status; 2036 2037 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 2038 ddi_get_name_addr(dip), new_state)); 2039 2040 mutex_enter(&vdp->xdf_cb_lk); 2041 2042 if (xdf_check_state_transition(vdp, new_state) == DDI_FAILURE) { 2043 mutex_exit(&vdp->xdf_cb_lk); 2044 return; 2045 } 2046 2047 switch (new_state) { 2048 case XenbusStateInitialising: 2049 ASSERT(vdp->xdf_status == XD_CLOSED); 2050 /* 2051 * backend recovered from a previous failure, 2052 * kick-off connect process again 2053 */ 2054 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 2055 cmn_err(CE_WARN, "xdf@%s:" 2056 " failed to start reconnecting to backend", 2057 ddi_get_name_addr(dip)); 2058 } 2059 break; 2060 case XenbusStateConnected: 2061 ASSERT(vdp->xdf_status == XD_INIT); 2062 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 2063 /* finish final init after connect */ 2064 if (xdf_post_connect(vdp) != DDI_SUCCESS) 2065 (void) xdf_start_disconnect(vdp); 2066 break; 2067 case XenbusStateClosing: 2068 if (vdp->xdf_status == XD_READY) { 2069 mutex_enter(&vdp->xdf_dev_lk); 2070 if (xdf_isopen(vdp, -1)) { 2071 cmn_err(CE_NOTE, "xdf@%s: hot-unplug failed, " 2072 "still in use", ddi_get_name_addr(dip)); 2073 mutex_exit(&vdp->xdf_dev_lk); 2074 break; 2075 } else { 2076 vdp->xdf_status = XD_CLOSING; 2077 } 2078 mutex_exit(&vdp->xdf_dev_lk); 2079 } 2080 (void) xdf_start_disconnect(vdp); 2081 break; 2082 case XenbusStateClosed: 2083 /* first check if BE closed unexpectedly */ 2084 mutex_enter(&vdp->xdf_dev_lk); 2085 if (xdf_isopen(vdp, -1)) { 2086 unexpect_die = B_TRUE; 2087 unexpectedie(vdp); 2088 cmn_err(CE_WARN, "xdf@%s: backend closed, " 2089 "reconnecting...", ddi_get_name_addr(dip)); 2090 } 2091 mutex_exit(&vdp->xdf_dev_lk); 2092 2093 if (vdp->xdf_status == XD_READY) { 2094 mutex_enter(&vdp->xdf_dev_lk); 2095 vdp->xdf_status = XD_CLOSING; 2096 mutex_exit(&vdp->xdf_dev_lk); 2097 2098 #ifdef DOMU_BACKEND 2099 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 2100 #endif 2101 2102 xdf_post_disconnect(vdp); 2103 (void) xvdi_switch_state(dip, XBT_NULL, 2104 XenbusStateClosed); 2105 } else if ((vdp->xdf_status == XD_INIT) || 2106 (vdp->xdf_status == XD_CLOSING)) { 2107 xdf_post_disconnect(vdp); 2108 } else { 2109 mutex_enter(&vdp->xdf_dev_lk); 2110 vdp->xdf_status = XD_CLOSED; 2111 mutex_exit(&vdp->xdf_dev_lk); 2112 } 2113 } 2114 2115 /* notify anybody waiting for oe state change */ 2116 mutex_enter(&vdp->xdf_dev_lk); 2117 cv_broadcast(&vdp->xdf_dev_cv); 2118 mutex_exit(&vdp->xdf_dev_lk); 2119 2120 status = vdp->xdf_status; 2121 mutex_exit(&vdp->xdf_cb_lk); 2122 2123 if (status == XD_READY) { 2124 xdf_iostart(vdp); 2125 } else if ((status == XD_CLOSED) && !unexpect_die) { 2126 /* interface is closed successfully, remove all minor nodes */ 2127 cmlb_detach(vdp->xdf_vd_lbl, NULL); 2128 cmlb_free_handle(&vdp->xdf_vd_lbl); 2129 } 2130 } 2131 2132 /* check if partition is open, -1 - check all partitions on the disk */ 2133 static boolean_t 2134 xdf_isopen(xdf_t *vdp, int partition) 2135 { 2136 int i; 2137 ulong_t parbit; 2138 boolean_t rval = B_FALSE; 2139 2140 ASSERT((partition == -1) || 2141 ((partition >= 0) || (partition < XDF_PEXT))); 2142 2143 if (partition == -1) 2144 parbit = (ulong_t)-1; 2145 else 2146 parbit = 1 << partition; 2147 2148 for (i = 0; i < OTYPCNT; i++) { 2149 if (vdp->xdf_vd_open[i] & parbit) 2150 rval = B_TRUE; 2151 } 2152 2153 return (rval); 2154 } 2155 2156 /* 2157 * Xdf_check_state_transition will check the XenbusState change to see 2158 * if the change is a valid transition or not. 2159 * The new state is written by backend domain, or by running xenstore-write 2160 * to change it manually in dom0 2161 */ 2162 static int 2163 xdf_check_state_transition(xdf_t *vdp, XenbusState oestate) 2164 { 2165 int status; 2166 int stcheck; 2167 #define STOK 0 /* need further process */ 2168 #define STNOP 1 /* no action need taking */ 2169 #define STBUG 2 /* unexpected state change, could be a bug */ 2170 2171 status = vdp->xdf_status; 2172 stcheck = STOK; 2173 2174 switch (status) { 2175 case XD_UNKNOWN: 2176 if ((oestate == XenbusStateUnknown) || 2177 (oestate == XenbusStateConnected)) 2178 stcheck = STBUG; 2179 else if ((oestate == XenbusStateInitialising) || 2180 (oestate == XenbusStateInitWait) || 2181 (oestate == XenbusStateInitialised)) 2182 stcheck = STNOP; 2183 break; 2184 case XD_INIT: 2185 if (oestate == XenbusStateUnknown) 2186 stcheck = STBUG; 2187 else if ((oestate == XenbusStateInitialising) || 2188 (oestate == XenbusStateInitWait) || 2189 (oestate == XenbusStateInitialised)) 2190 stcheck = STNOP; 2191 break; 2192 case XD_READY: 2193 if ((oestate == XenbusStateUnknown) || 2194 (oestate == XenbusStateInitialising) || 2195 (oestate == XenbusStateInitWait) || 2196 (oestate == XenbusStateInitialised)) 2197 stcheck = STBUG; 2198 else if (oestate == XenbusStateConnected) 2199 stcheck = STNOP; 2200 break; 2201 case XD_CLOSING: 2202 if ((oestate == XenbusStateUnknown) || 2203 (oestate == XenbusStateInitialising) || 2204 (oestate == XenbusStateInitWait) || 2205 (oestate == XenbusStateInitialised) || 2206 (oestate == XenbusStateConnected)) 2207 stcheck = STBUG; 2208 else if (oestate == XenbusStateClosing) 2209 stcheck = STNOP; 2210 break; 2211 case XD_CLOSED: 2212 if ((oestate == XenbusStateUnknown) || 2213 (oestate == XenbusStateConnected)) 2214 stcheck = STBUG; 2215 else if ((oestate == XenbusStateInitWait) || 2216 (oestate == XenbusStateInitialised) || 2217 (oestate == XenbusStateClosing) || 2218 (oestate == XenbusStateClosed)) 2219 stcheck = STNOP; 2220 break; 2221 case XD_SUSPEND: 2222 default: 2223 stcheck = STBUG; 2224 } 2225 2226 if (stcheck == STOK) 2227 return (DDI_SUCCESS); 2228 2229 if (stcheck == STBUG) 2230 cmn_err(CE_NOTE, "xdf@%s: unexpected otherend " 2231 "state change to %d!, when status is %d", 2232 ddi_get_name_addr(vdp->xdf_dip), oestate, status); 2233 2234 return (DDI_FAILURE); 2235 } 2236 2237 static int 2238 xdf_connect(xdf_t *vdp, boolean_t wait) 2239 { 2240 ASSERT(mutex_owned(&vdp->xdf_dev_lk)); 2241 while (vdp->xdf_status != XD_READY) { 2242 if (!wait || (vdp->xdf_status > XD_READY)) 2243 break; 2244 2245 if (cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk) == 0) 2246 break; 2247 } 2248 2249 return (vdp->xdf_status); 2250 } 2251 2252 /* 2253 * callback func when DMA/GTE resources is available 2254 * 2255 * Note: we only register one callback function to grant table subsystem 2256 * since we only have one 'struct gnttab_free_callback' in xdf_t. 2257 */ 2258 static int 2259 xdf_dmacallback(caddr_t arg) 2260 { 2261 xdf_t *vdp = (xdf_t *)arg; 2262 ASSERT(vdp != NULL); 2263 2264 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 2265 ddi_get_name_addr(vdp->xdf_dip))); 2266 2267 ddi_trigger_softintr(vdp->xdf_softintr_id); 2268 return (DDI_DMA_CALLBACK_DONE); 2269 } 2270 2271 static uint_t 2272 xdf_iorestart(caddr_t arg) 2273 { 2274 xdf_t *vdp = (xdf_t *)arg; 2275 2276 ASSERT(vdp != NULL); 2277 2278 mutex_enter(&vdp->xdf_dev_lk); 2279 ASSERT(ISDMACBON(vdp)); 2280 SETDMACBOFF(vdp); 2281 mutex_exit(&vdp->xdf_dev_lk); 2282 2283 xdf_iostart(vdp); 2284 2285 return (DDI_INTR_CLAIMED); 2286 } 2287 2288 static void 2289 xdf_timeout_handler(void *arg) 2290 { 2291 xdf_t *vdp = arg; 2292 2293 mutex_enter(&vdp->xdf_dev_lk); 2294 vdp->xdf_timeout_id = 0; 2295 mutex_exit(&vdp->xdf_dev_lk); 2296 2297 /* new timeout thread could be re-scheduled */ 2298 xdf_iostart(vdp); 2299 } 2300 2301 /* 2302 * Alloc a vreq for this bp 2303 * bp->av_back contains the pointer to the vreq upon return 2304 */ 2305 static v_req_t * 2306 vreq_get(xdf_t *vdp, buf_t *bp) 2307 { 2308 v_req_t *vreq = NULL; 2309 2310 ASSERT(BP2VREQ(bp) == NULL); 2311 2312 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 2313 if (vreq == NULL) { 2314 if (vdp->xdf_timeout_id == 0) 2315 /* restart I/O after one second */ 2316 vdp->xdf_timeout_id = 2317 timeout(xdf_timeout_handler, vdp, hz); 2318 return (NULL); 2319 } 2320 bzero(vreq, sizeof (v_req_t)); 2321 2322 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 2323 bp->av_back = (buf_t *)vreq; 2324 vreq->v_buf = bp; 2325 vreq->v_status = VREQ_INIT; 2326 /* init of other fields in vreq is up to the caller */ 2327 2328 return (vreq); 2329 } 2330 2331 static void 2332 vreq_free(xdf_t *vdp, v_req_t *vreq) 2333 { 2334 buf_t *bp = vreq->v_buf; 2335 2336 list_remove(&vdp->xdf_vreq_act, (void *)vreq); 2337 2338 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 2339 goto done; 2340 2341 switch (vreq->v_status) { 2342 case VREQ_DMAWIN_DONE: 2343 case VREQ_GS_ALLOCED: 2344 case VREQ_DMABUF_BOUND: 2345 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 2346 /*FALLTHRU*/ 2347 case VREQ_DMAMEM_ALLOCED: 2348 if (!ALIGNED_XFER(bp)) { 2349 ASSERT(vreq->v_abuf != NULL); 2350 if (!IS_ERROR(bp) && IS_READ(bp)) 2351 bcopy(vreq->v_abuf, bp->b_un.b_addr, 2352 bp->b_bcount); 2353 ddi_dma_mem_free(&vreq->v_align); 2354 } 2355 /*FALLTHRU*/ 2356 case VREQ_MEMDMAHDL_ALLOCED: 2357 if (!ALIGNED_XFER(bp)) 2358 ddi_dma_free_handle(&vreq->v_memdmahdl); 2359 /*FALLTHRU*/ 2360 case VREQ_DMAHDL_ALLOCED: 2361 ddi_dma_free_handle(&vreq->v_dmahdl); 2362 break; 2363 default: 2364 break; 2365 } 2366 done: 2367 vreq->v_buf->av_back = NULL; 2368 kmem_cache_free(xdf_vreq_cache, vreq); 2369 } 2370 2371 /* 2372 * Initalize the DMA and grant table resources for the buf 2373 */ 2374 static int 2375 vreq_setup(xdf_t *vdp, v_req_t *vreq) 2376 { 2377 int rc; 2378 ddi_dma_attr_t dmaattr; 2379 uint_t ndcs, ndws; 2380 ddi_dma_handle_t dh; 2381 ddi_dma_handle_t mdh; 2382 ddi_dma_cookie_t dc; 2383 ddi_acc_handle_t abh; 2384 caddr_t aba; 2385 ge_slot_t *gs; 2386 size_t bufsz; 2387 off_t off; 2388 size_t sz; 2389 buf_t *bp = vreq->v_buf; 2390 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 2391 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 2392 2393 switch (vreq->v_status) { 2394 case VREQ_INIT: 2395 if (IS_FLUSH_DISKCACHE(bp)) { 2396 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2397 DPRINTF(DMA_DBG, ( 2398 "xdf@%s: get ge_slotfailed\n", 2399 ddi_get_name_addr(vdp->xdf_dip))); 2400 return (DDI_FAILURE); 2401 } 2402 vreq->v_blkno = 0; 2403 vreq->v_nslots = 1; 2404 vreq->v_gs = gs; 2405 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 2406 vreq->v_status = VREQ_GS_ALLOCED; 2407 gs->vreq = vreq; 2408 return (DDI_SUCCESS); 2409 } 2410 2411 if (IS_WRITE_BARRIER(vdp, bp)) 2412 vreq->v_flush_diskcache = WRITE_BARRIER; 2413 vreq->v_blkno = bp->b_blkno + 2414 (diskaddr_t)(uintptr_t)bp->b_private; 2415 bp->b_private = NULL; 2416 /* See if we wrote new data to our flush block */ 2417 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 2418 check_fbwrite(vdp, bp, vreq->v_blkno); 2419 vreq->v_status = VREQ_INIT_DONE; 2420 /*FALLTHRU*/ 2421 2422 case VREQ_INIT_DONE: 2423 /* 2424 * alloc DMA handle 2425 */ 2426 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 2427 xdf_dmacallback, (caddr_t)vdp, &dh); 2428 if (rc != DDI_SUCCESS) { 2429 SETDMACBON(vdp); 2430 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 2431 ddi_get_name_addr(vdp->xdf_dip))); 2432 return (DDI_FAILURE); 2433 } 2434 2435 vreq->v_dmahdl = dh; 2436 vreq->v_status = VREQ_DMAHDL_ALLOCED; 2437 /*FALLTHRU*/ 2438 2439 case VREQ_DMAHDL_ALLOCED: 2440 /* 2441 * alloc dma handle for 512-byte aligned buf 2442 */ 2443 if (!ALIGNED_XFER(bp)) { 2444 /* 2445 * XXPV: we need to temporarily enlarge the seg 2446 * boundary and s/g length to work round CR6381968 2447 */ 2448 dmaattr = xb_dma_attr; 2449 dmaattr.dma_attr_seg = (uint64_t)-1; 2450 dmaattr.dma_attr_sgllen = INT_MAX; 2451 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 2452 xdf_dmacallback, (caddr_t)vdp, &mdh); 2453 if (rc != DDI_SUCCESS) { 2454 SETDMACBON(vdp); 2455 DPRINTF(DMA_DBG, ("xdf@%s: unaligned buf DMA" 2456 "handle alloc failed\n", 2457 ddi_get_name_addr(vdp->xdf_dip))); 2458 return (DDI_FAILURE); 2459 } 2460 vreq->v_memdmahdl = mdh; 2461 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 2462 } 2463 /*FALLTHRU*/ 2464 2465 case VREQ_MEMDMAHDL_ALLOCED: 2466 /* 2467 * alloc 512-byte aligned buf 2468 */ 2469 if (!ALIGNED_XFER(bp)) { 2470 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 2471 bp_mapin(bp); 2472 2473 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 2474 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 2475 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 2476 &aba, &bufsz, &abh); 2477 if (rc != DDI_SUCCESS) { 2478 SETDMACBON(vdp); 2479 DPRINTF(DMA_DBG, ( 2480 "xdf@%s: DMA mem allocation failed\n", 2481 ddi_get_name_addr(vdp->xdf_dip))); 2482 return (DDI_FAILURE); 2483 } 2484 2485 vreq->v_abuf = aba; 2486 vreq->v_align = abh; 2487 vreq->v_status = VREQ_DMAMEM_ALLOCED; 2488 2489 ASSERT(bufsz >= bp->b_bcount); 2490 if (!IS_READ(bp)) 2491 bcopy(bp->b_un.b_addr, vreq->v_abuf, 2492 bp->b_bcount); 2493 } 2494 /*FALLTHRU*/ 2495 2496 case VREQ_DMAMEM_ALLOCED: 2497 /* 2498 * dma bind 2499 */ 2500 if (ALIGNED_XFER(bp)) { 2501 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 2502 dma_flags, xdf_dmacallback, (caddr_t)vdp, 2503 &dc, &ndcs); 2504 } else { 2505 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 2506 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 2507 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 2508 } 2509 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 2510 /* get num of dma windows */ 2511 if (rc == DDI_DMA_PARTIAL_MAP) { 2512 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 2513 ASSERT(rc == DDI_SUCCESS); 2514 } else { 2515 ndws = 1; 2516 } 2517 } else { 2518 SETDMACBON(vdp); 2519 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 2520 ddi_get_name_addr(vdp->xdf_dip))); 2521 return (DDI_FAILURE); 2522 } 2523 2524 vreq->v_dmac = dc; 2525 vreq->v_dmaw = 0; 2526 vreq->v_ndmacs = ndcs; 2527 vreq->v_ndmaws = ndws; 2528 vreq->v_nslots = ndws; 2529 vreq->v_status = VREQ_DMABUF_BOUND; 2530 /*FALLTHRU*/ 2531 2532 case VREQ_DMABUF_BOUND: 2533 /* 2534 * get ge_slot, callback is set upon failure from gs_get(), 2535 * if not set previously 2536 */ 2537 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2538 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2539 ddi_get_name_addr(vdp->xdf_dip))); 2540 return (DDI_FAILURE); 2541 } 2542 2543 vreq->v_gs = gs; 2544 gs->vreq = vreq; 2545 vreq->v_status = VREQ_GS_ALLOCED; 2546 break; 2547 2548 case VREQ_GS_ALLOCED: 2549 /* nothing need to be done */ 2550 break; 2551 2552 case VREQ_DMAWIN_DONE: 2553 /* 2554 * move to the next dma window 2555 */ 2556 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 2557 2558 /* get a ge_slot for this DMA window */ 2559 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2560 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2561 ddi_get_name_addr(vdp->xdf_dip))); 2562 return (DDI_FAILURE); 2563 } 2564 2565 vreq->v_gs = gs; 2566 gs->vreq = vreq; 2567 vreq->v_dmaw++; 2568 rc = ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 2569 &vreq->v_dmac, &vreq->v_ndmacs); 2570 ASSERT(rc == DDI_SUCCESS); 2571 vreq->v_status = VREQ_GS_ALLOCED; 2572 break; 2573 2574 default: 2575 return (DDI_FAILURE); 2576 } 2577 2578 return (DDI_SUCCESS); 2579 } 2580 2581 static ge_slot_t * 2582 gs_get(xdf_t *vdp, int isread) 2583 { 2584 grant_ref_t gh; 2585 ge_slot_t *gs; 2586 2587 /* try to alloc GTEs needed in this slot, first */ 2588 if (gnttab_alloc_grant_references( 2589 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 2590 if (vdp->xdf_gnt_callback.next == NULL) { 2591 SETDMACBON(vdp); 2592 gnttab_request_free_callback( 2593 &vdp->xdf_gnt_callback, 2594 (void (*)(void *))xdf_dmacallback, 2595 (void *)vdp, 2596 BLKIF_MAX_SEGMENTS_PER_REQUEST); 2597 } 2598 return (NULL); 2599 } 2600 2601 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 2602 if (gs == NULL) { 2603 gnttab_free_grant_references(gh); 2604 if (vdp->xdf_timeout_id == 0) 2605 /* restart I/O after one second */ 2606 vdp->xdf_timeout_id = 2607 timeout(xdf_timeout_handler, vdp, hz); 2608 return (NULL); 2609 } 2610 2611 /* init gs_slot */ 2612 list_insert_head(&vdp->xdf_gs_act, (void *)gs); 2613 gs->oeid = vdp->xdf_peer; 2614 gs->isread = isread; 2615 gs->ghead = gh; 2616 gs->ngrefs = 0; 2617 2618 return (gs); 2619 } 2620 2621 static void 2622 gs_free(xdf_t *vdp, ge_slot_t *gs) 2623 { 2624 int i; 2625 grant_ref_t *gp = gs->ge; 2626 int ngrefs = gs->ngrefs; 2627 boolean_t isread = gs->isread; 2628 2629 list_remove(&vdp->xdf_gs_act, (void *)gs); 2630 2631 /* release all grant table entry resources used in this slot */ 2632 for (i = 0; i < ngrefs; i++, gp++) 2633 gnttab_end_foreign_access(*gp, !isread, 0); 2634 gnttab_free_grant_references(gs->ghead); 2635 2636 kmem_cache_free(xdf_gs_cache, (void *)gs); 2637 } 2638 2639 static grant_ref_t 2640 gs_grant(ge_slot_t *gs, mfn_t mfn) 2641 { 2642 grant_ref_t gr = gnttab_claim_grant_reference(&gs->ghead); 2643 2644 ASSERT(gr != -1); 2645 ASSERT(gs->ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 2646 gs->ge[gs->ngrefs++] = gr; 2647 gnttab_grant_foreign_access_ref(gr, gs->oeid, mfn, !gs->isread); 2648 2649 return (gr); 2650 } 2651 2652 static void 2653 unexpectedie(xdf_t *vdp) 2654 { 2655 /* clean up I/Os in ring that have responses */ 2656 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 2657 mutex_exit(&vdp->xdf_dev_lk); 2658 (void) xdf_intr((caddr_t)vdp); 2659 mutex_enter(&vdp->xdf_dev_lk); 2660 } 2661 2662 /* free up all grant table entries */ 2663 while (!list_is_empty(&vdp->xdf_gs_act)) 2664 gs_free(vdp, list_head(&vdp->xdf_gs_act)); 2665 2666 /* 2667 * move bp back to active list orderly 2668 * vreq_busy is updated in vreq_free() 2669 */ 2670 while (!list_is_empty(&vdp->xdf_vreq_act)) { 2671 v_req_t *vreq = list_head(&vdp->xdf_vreq_act); 2672 buf_t *bp = vreq->v_buf; 2673 2674 bp->av_back = NULL; 2675 bp->b_resid = bp->b_bcount; 2676 if (vdp->xdf_f_act == NULL) { 2677 vdp->xdf_f_act = vdp->xdf_l_act = bp; 2678 } else { 2679 /* move to the head of list */ 2680 bp->av_forw = vdp->xdf_f_act; 2681 vdp->xdf_f_act = bp; 2682 } 2683 if (vdp->xdf_xdev_iostat != NULL) 2684 kstat_runq_back_to_waitq( 2685 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 2686 vreq_free(vdp, vreq); 2687 } 2688 } 2689 2690 static void 2691 xdfmin(struct buf *bp) 2692 { 2693 if (bp->b_bcount > xdf_maxphys) 2694 bp->b_bcount = xdf_maxphys; 2695 } 2696 2697 void 2698 xdf_kstat_delete(dev_info_t *dip) 2699 { 2700 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2701 kstat_t *kstat; 2702 2703 /* 2704 * The locking order here is xdf_iostat_lk and then xdf_dev_lk. 2705 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer 2706 * and the contents of the our kstat. xdf_iostat_lk is used 2707 * to protect the allocation and freeing of the actual kstat. 2708 * xdf_dev_lk can't be used for this purpose because kstat 2709 * readers use it to access the contents of the kstat and 2710 * hence it can't be held when calling kstat_delete(). 2711 */ 2712 mutex_enter(&vdp->xdf_iostat_lk); 2713 mutex_enter(&vdp->xdf_dev_lk); 2714 2715 if (vdp->xdf_xdev_iostat == NULL) { 2716 mutex_exit(&vdp->xdf_dev_lk); 2717 mutex_exit(&vdp->xdf_iostat_lk); 2718 return; 2719 } 2720 2721 kstat = vdp->xdf_xdev_iostat; 2722 vdp->xdf_xdev_iostat = NULL; 2723 mutex_exit(&vdp->xdf_dev_lk); 2724 2725 kstat_delete(kstat); 2726 mutex_exit(&vdp->xdf_iostat_lk); 2727 } 2728 2729 int 2730 xdf_kstat_create(dev_info_t *dip, char *ks_module, int ks_instance) 2731 { 2732 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2733 2734 /* See comment about locking in xdf_kstat_delete(). */ 2735 mutex_enter(&vdp->xdf_iostat_lk); 2736 mutex_enter(&vdp->xdf_dev_lk); 2737 2738 if (vdp->xdf_xdev_iostat != NULL) { 2739 mutex_exit(&vdp->xdf_dev_lk); 2740 mutex_exit(&vdp->xdf_iostat_lk); 2741 return (-1); 2742 } 2743 2744 if ((vdp->xdf_xdev_iostat = kstat_create( 2745 ks_module, ks_instance, NULL, "disk", 2746 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 2747 mutex_exit(&vdp->xdf_dev_lk); 2748 mutex_exit(&vdp->xdf_iostat_lk); 2749 return (-1); 2750 } 2751 2752 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 2753 kstat_install(vdp->xdf_xdev_iostat); 2754 mutex_exit(&vdp->xdf_dev_lk); 2755 mutex_exit(&vdp->xdf_iostat_lk); 2756 2757 return (0); 2758 } 2759 2760 #if defined(XPV_HVM_DRIVER) 2761 2762 typedef struct xdf_hvm_entry { 2763 list_node_t xdf_he_list; 2764 char *xdf_he_path; 2765 dev_info_t *xdf_he_dip; 2766 } xdf_hvm_entry_t; 2767 2768 static list_t xdf_hvm_list; 2769 static kmutex_t xdf_hvm_list_lock; 2770 2771 static xdf_hvm_entry_t * 2772 i_xdf_hvm_find(char *path, dev_info_t *dip) 2773 { 2774 xdf_hvm_entry_t *i; 2775 2776 ASSERT((path != NULL) || (dip != NULL)); 2777 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); 2778 2779 i = list_head(&xdf_hvm_list); 2780 while (i != NULL) { 2781 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { 2782 i = list_next(&xdf_hvm_list, i); 2783 continue; 2784 } 2785 if ((dip != NULL) && (i->xdf_he_dip != dip)) { 2786 i = list_next(&xdf_hvm_list, i); 2787 continue; 2788 } 2789 break; 2790 } 2791 return (i); 2792 } 2793 2794 dev_info_t * 2795 xdf_hvm_hold(char *path) 2796 { 2797 xdf_hvm_entry_t *i; 2798 dev_info_t *dip; 2799 2800 mutex_enter(&xdf_hvm_list_lock); 2801 i = i_xdf_hvm_find(path, NULL); 2802 if (i == NULL) { 2803 mutex_exit(&xdf_hvm_list_lock); 2804 return (B_FALSE); 2805 } 2806 ndi_hold_devi(dip = i->xdf_he_dip); 2807 mutex_exit(&xdf_hvm_list_lock); 2808 return (dip); 2809 } 2810 2811 static void 2812 xdf_hvm_add(dev_info_t *dip) 2813 { 2814 xdf_hvm_entry_t *i; 2815 char *path; 2816 2817 /* figure out the path for the dip */ 2818 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 2819 (void) ddi_pathname(dip, path); 2820 2821 i = kmem_alloc(sizeof (*i), KM_SLEEP); 2822 i->xdf_he_dip = dip; 2823 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); 2824 2825 mutex_enter(&xdf_hvm_list_lock); 2826 ASSERT(i_xdf_hvm_find(path, NULL) == NULL); 2827 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); 2828 list_insert_head(&xdf_hvm_list, i); 2829 mutex_exit(&xdf_hvm_list_lock); 2830 2831 kmem_free(path, MAXPATHLEN); 2832 } 2833 2834 static void 2835 xdf_hvm_rm(dev_info_t *dip) 2836 { 2837 xdf_hvm_entry_t *i; 2838 2839 mutex_enter(&xdf_hvm_list_lock); 2840 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); 2841 list_remove(&xdf_hvm_list, i); 2842 mutex_exit(&xdf_hvm_list_lock); 2843 2844 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); 2845 kmem_free(i, sizeof (*i)); 2846 } 2847 2848 static void 2849 xdf_hvm_init(void) 2850 { 2851 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), 2852 offsetof(xdf_hvm_entry_t, xdf_he_list)); 2853 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); 2854 } 2855 2856 static void 2857 xdf_hvm_fini(void) 2858 { 2859 ASSERT(list_head(&xdf_hvm_list) == NULL); 2860 list_destroy(&xdf_hvm_list); 2861 mutex_destroy(&xdf_hvm_list_lock); 2862 } 2863 2864 int 2865 xdf_hvm_connect(dev_info_t *dip) 2866 { 2867 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2868 int rv; 2869 2870 /* do cv_wait until connected or failed */ 2871 mutex_enter(&vdp->xdf_dev_lk); 2872 rv = xdf_connect(vdp, B_TRUE); 2873 mutex_exit(&vdp->xdf_dev_lk); 2874 return ((rv == XD_READY) ? 0 : -1); 2875 } 2876 2877 int 2878 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2879 { 2880 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2881 2882 /* sanity check the requested physical geometry */ 2883 mutex_enter(&vdp->xdf_dev_lk); 2884 if ((geomp->g_secsize != XB_BSIZE) || 2885 (geomp->g_capacity == 0)) { 2886 mutex_exit(&vdp->xdf_dev_lk); 2887 return (EINVAL); 2888 } 2889 2890 /* 2891 * If we've already connected to the backend device then make sure 2892 * we're not defining a physical geometry larger than our backend 2893 * device. 2894 */ 2895 if ((vdp->xdf_xdev_nblocks != 0) && 2896 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { 2897 mutex_exit(&vdp->xdf_dev_lk); 2898 return (EINVAL); 2899 } 2900 2901 vdp->xdf_pgeom = *geomp; 2902 mutex_exit(&vdp->xdf_dev_lk); 2903 2904 /* force a re-validation */ 2905 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 2906 2907 return (0); 2908 } 2909 2910 #endif /* XPV_HVM_DRIVER */ 2911