1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * xdf.c - Xen Virtual Block Device Driver 29 * TODO: 30 * - support alternate block size (currently only DEV_BSIZE supported) 31 * - revalidate geometry for removable devices 32 */ 33 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/conf.h> 39 #include <sys/cmlb.h> 40 #include <sys/dkio.h> 41 #include <sys/promif.h> 42 #include <sys/sysmacros.h> 43 #include <sys/kstat.h> 44 #include <sys/mach_mmu.h> 45 #ifdef XPV_HVM_DRIVER 46 #include <sys/xpv_support.h> 47 #include <sys/sunndi.h> 48 #endif /* XPV_HVM_DRIVER */ 49 #include <public/io/xenbus.h> 50 #include <xen/sys/xenbus_impl.h> 51 #include <xen/sys/xendev.h> 52 #include <sys/gnttab.h> 53 #include <sys/scsi/generic/inquiry.h> 54 #include <xen/io/blkif_impl.h> 55 #include <io/xdf.h> 56 57 #define FLUSH_DISKCACHE 0x1 58 #define WRITE_BARRIER 0x2 59 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 60 #define USE_WRITE_BARRIER(vdp) \ 61 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 62 #define USE_FLUSH_DISKCACHE(vdp) \ 63 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 64 #define IS_WRITE_BARRIER(vdp, bp) \ 65 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 66 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 67 #define IS_FLUSH_DISKCACHE(bp) \ 68 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 69 70 static void *vbd_ss; 71 static kmem_cache_t *xdf_vreq_cache; 72 static kmem_cache_t *xdf_gs_cache; 73 static int xdf_maxphys = XB_MAXPHYS; 74 int xdfdebug = 0; 75 extern int do_polled_io; 76 diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 77 int xdf_barrier_flush_disable = 0; 78 79 /* 80 * dev_ops and cb_ops entrypoints 81 */ 82 static int xdf_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 83 static int xdf_attach(dev_info_t *, ddi_attach_cmd_t); 84 static int xdf_detach(dev_info_t *, ddi_detach_cmd_t); 85 static int xdf_reset(dev_info_t *, ddi_reset_cmd_t); 86 static int xdf_open(dev_t *, int, int, cred_t *); 87 static int xdf_close(dev_t, int, int, struct cred *); 88 static int xdf_strategy(struct buf *); 89 static int xdf_read(dev_t, struct uio *, cred_t *); 90 static int xdf_aread(dev_t, struct aio_req *, cred_t *); 91 static int xdf_write(dev_t, struct uio *, cred_t *); 92 static int xdf_awrite(dev_t, struct aio_req *, cred_t *); 93 static int xdf_dump(dev_t, caddr_t, daddr_t, int); 94 static int xdf_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 95 static uint_t xdf_intr(caddr_t); 96 static int xdf_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 97 caddr_t, int *); 98 99 /* 100 * misc private functions 101 */ 102 static int xdf_suspend(dev_info_t *); 103 static int xdf_resume(dev_info_t *); 104 static int xdf_start_connect(xdf_t *); 105 static int xdf_start_disconnect(xdf_t *); 106 static int xdf_post_connect(xdf_t *); 107 static void xdf_post_disconnect(xdf_t *); 108 static void xdf_oe_change(dev_info_t *, ddi_eventcookie_t, void *, void *); 109 static void xdf_iostart(xdf_t *); 110 static void xdf_iofini(xdf_t *, uint64_t, int); 111 static int xdf_prepare_rreq(xdf_t *, struct buf *, blkif_request_t *); 112 static int xdf_drain_io(xdf_t *); 113 static boolean_t xdf_isopen(xdf_t *, int); 114 static int xdf_check_state_transition(xdf_t *, XenbusState); 115 static int xdf_connect(xdf_t *, boolean_t); 116 static int xdf_dmacallback(caddr_t); 117 static void xdf_timeout_handler(void *); 118 static uint_t xdf_iorestart(caddr_t); 119 static v_req_t *vreq_get(xdf_t *, buf_t *); 120 static void vreq_free(xdf_t *, v_req_t *); 121 static int vreq_setup(xdf_t *, v_req_t *); 122 static ge_slot_t *gs_get(xdf_t *, int); 123 static void gs_free(xdf_t *, ge_slot_t *); 124 static grant_ref_t gs_grant(ge_slot_t *, mfn_t); 125 static void unexpectedie(xdf_t *); 126 static void xdfmin(struct buf *); 127 static void xdf_synthetic_pgeom(dev_info_t *, cmlb_geom_t *); 128 extern int xdf_kstat_create(dev_info_t *, char *, int); 129 extern void xdf_kstat_delete(dev_info_t *); 130 131 #if defined(XPV_HVM_DRIVER) 132 static void xdf_hvm_add(dev_info_t *); 133 static void xdf_hvm_rm(dev_info_t *); 134 static void xdf_hvm_init(void); 135 static void xdf_hvm_fini(void); 136 #endif /* XPV_HVM_DRIVER */ 137 138 static struct cb_ops xdf_cbops = { 139 xdf_open, 140 xdf_close, 141 xdf_strategy, 142 nodev, 143 xdf_dump, 144 xdf_read, 145 xdf_write, 146 xdf_ioctl, 147 nodev, 148 nodev, 149 nodev, 150 nochpoll, 151 xdf_prop_op, 152 NULL, 153 D_MP | D_NEW | D_64BIT, 154 CB_REV, 155 xdf_aread, 156 xdf_awrite 157 }; 158 159 struct dev_ops xdf_devops = { 160 DEVO_REV, /* devo_rev */ 161 0, /* devo_refcnt */ 162 xdf_getinfo, /* devo_getinfo */ 163 nulldev, /* devo_identify */ 164 nulldev, /* devo_probe */ 165 xdf_attach, /* devo_attach */ 166 xdf_detach, /* devo_detach */ 167 xdf_reset, /* devo_reset */ 168 &xdf_cbops, /* devo_cb_ops */ 169 (struct bus_ops *)NULL /* devo_bus_ops */ 170 }; 171 172 static struct modldrv modldrv = { 173 &mod_driverops, /* Type of module. This one is a driver */ 174 "virtual block driver %I%", /* short description */ 175 &xdf_devops /* driver specific ops */ 176 }; 177 178 static struct modlinkage xdf_modlinkage = { 179 MODREV_1, (void *)&modldrv, NULL 180 }; 181 182 /* 183 * I/O buffer DMA attributes 184 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 185 */ 186 static ddi_dma_attr_t xb_dma_attr = { 187 DMA_ATTR_V0, 188 (uint64_t)0, /* lowest address */ 189 (uint64_t)0xffffffffffffffff, /* highest usable address */ 190 (uint64_t)0xffffff, /* DMA counter limit max */ 191 (uint64_t)XB_BSIZE, /* alignment in bytes */ 192 XB_BSIZE - 1, /* bitmap of burst sizes */ 193 XB_BSIZE, /* min transfer */ 194 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 195 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 196 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 197 XB_BSIZE, /* granularity */ 198 0, /* flags (reserved) */ 199 }; 200 201 static ddi_device_acc_attr_t xc_acc_attr = { 202 DDI_DEVICE_ATTR_V0, 203 DDI_NEVERSWAP_ACC, 204 DDI_STRICTORDER_ACC 205 }; 206 207 /* callbacks from commmon label */ 208 209 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); 210 int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 211 212 static cmlb_tg_ops_t xdf_lb_ops = { 213 TG_DK_OPS_VERSION_1, 214 xdf_lb_rdwr, 215 xdf_lb_getinfo 216 }; 217 218 int 219 _init(void) 220 { 221 int rc; 222 223 if ((rc = ddi_soft_state_init(&vbd_ss, sizeof (xdf_t), 0)) != 0) 224 return (rc); 225 226 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 227 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 228 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 229 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 230 231 #if defined(XPV_HVM_DRIVER) 232 xdf_hvm_init(); 233 #endif /* XPV_HVM_DRIVER */ 234 235 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 236 #if defined(XPV_HVM_DRIVER) 237 xdf_hvm_fini(); 238 #endif /* XPV_HVM_DRIVER */ 239 kmem_cache_destroy(xdf_vreq_cache); 240 kmem_cache_destroy(xdf_gs_cache); 241 ddi_soft_state_fini(&vbd_ss); 242 return (rc); 243 } 244 245 return (rc); 246 } 247 248 int 249 _fini(void) 250 { 251 252 int err; 253 if ((err = mod_remove(&xdf_modlinkage)) != 0) 254 return (err); 255 256 #if defined(XPV_HVM_DRIVER) 257 xdf_hvm_fini(); 258 #endif /* XPV_HVM_DRIVER */ 259 260 kmem_cache_destroy(xdf_vreq_cache); 261 kmem_cache_destroy(xdf_gs_cache); 262 ddi_soft_state_fini(&vbd_ss); 263 264 return (0); 265 } 266 267 int 268 _info(struct modinfo *modinfop) 269 { 270 return (mod_info(&xdf_modlinkage, modinfop)); 271 } 272 273 /*ARGSUSED*/ 274 static int 275 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 276 { 277 int instance; 278 xdf_t *vbdp; 279 280 instance = XDF_INST(getminor((dev_t)arg)); 281 282 switch (cmd) { 283 case DDI_INFO_DEVT2DEVINFO: 284 if ((vbdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) { 285 *rp = NULL; 286 return (DDI_FAILURE); 287 } 288 *rp = vbdp->xdf_dip; 289 return (DDI_SUCCESS); 290 291 case DDI_INFO_DEVT2INSTANCE: 292 *rp = (void *)(uintptr_t)instance; 293 return (DDI_SUCCESS); 294 295 default: 296 return (DDI_FAILURE); 297 } 298 } 299 300 static int 301 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 302 char *name, caddr_t valuep, int *lengthp) 303 { 304 xdf_t *vdp; 305 306 if ((vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(dip))) == NULL) 307 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 308 name, valuep, lengthp)); 309 310 /* do cv_wait until connected or failed */ 311 mutex_enter(&vdp->xdf_dev_lk); 312 if (xdf_connect(vdp, B_TRUE) != XD_READY) { 313 mutex_exit(&vdp->xdf_dev_lk); 314 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 315 name, valuep, lengthp)); 316 } 317 mutex_exit(&vdp->xdf_dev_lk); 318 319 return (cmlb_prop_op(vdp->xdf_vd_lbl, 320 dev, dip, prop_op, mod_flags, name, valuep, lengthp, 321 XDF_PART(getminor(dev)), NULL)); 322 } 323 324 static int 325 xdf_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 326 { 327 xdf_t *vdp; 328 ddi_iblock_cookie_t softibc; 329 int instance; 330 331 xdfdebug = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_NOTPROM, 332 "xdfdebug", 0); 333 334 switch (cmd) { 335 case DDI_ATTACH: 336 break; 337 338 case DDI_RESUME: 339 return (xdf_resume(devi)); 340 341 default: 342 return (DDI_FAILURE); 343 } 344 345 instance = ddi_get_instance(devi); 346 if (ddi_soft_state_zalloc(vbd_ss, instance) != DDI_SUCCESS) 347 return (DDI_FAILURE); 348 349 DPRINTF(DDI_DBG, ("xdf%d: attaching\n", instance)); 350 vdp = ddi_get_soft_state(vbd_ss, instance); 351 ddi_set_driver_private(devi, vdp); 352 vdp->xdf_dip = devi; 353 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 354 355 if (ddi_get_iblock_cookie(devi, 0, &vdp->xdf_ibc) != DDI_SUCCESS) { 356 cmn_err(CE_WARN, "xdf@%s: failed to get iblock cookie", 357 ddi_get_name_addr(devi)); 358 goto errout0; 359 } 360 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)vdp->xdf_ibc); 361 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)vdp->xdf_ibc); 362 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, 363 (void *)vdp->xdf_ibc); 364 365 if (ddi_get_soft_iblock_cookie(devi, DDI_SOFTINT_LOW, &softibc) 366 != DDI_SUCCESS) { 367 cmn_err(CE_WARN, "xdf@%s: failed to get softintr iblock cookie", 368 ddi_get_name_addr(devi)); 369 goto errout0; 370 } 371 if (ddi_add_softintr(devi, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 372 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 373 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 374 ddi_get_name_addr(devi)); 375 goto errout0; 376 } 377 378 #if !defined(XPV_HVM_DRIVER) 379 /* create kstat for iostat(1M) */ 380 if (xdf_kstat_create(devi, "xdf", instance) != 0) { 381 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 382 ddi_get_name_addr(devi)); 383 goto errout0; 384 } 385 #endif /* !XPV_HVM_DRIVER */ 386 387 /* driver handles kernel-issued IOCTLs */ 388 if (ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP, 389 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 390 cmn_err(CE_WARN, "xdf@%s: cannot create DDI_KERNEL_IOCTL prop", 391 ddi_get_name_addr(devi)); 392 goto errout0; 393 } 394 395 /* 396 * Initialize the physical geometry stucture. Note that currently 397 * we don't know the size of the backend device so the number 398 * of blocks on the device will be initialized to zero. Once 399 * we connect to the backend device we'll update the physical 400 * geometry to reflect the real size of the device. 401 */ 402 xdf_synthetic_pgeom(devi, &vdp->xdf_pgeom); 403 404 /* 405 * create default device minor nodes: non-removable disk 406 * we will adjust minor nodes after we are connected w/ backend 407 */ 408 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 409 if (cmlb_attach(devi, &xdf_lb_ops, DTYPE_DIRECT, 0, 1, 410 DDI_NT_BLOCK_XVMD, 411 #if defined(XPV_HVM_DRIVER) 412 CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT | 413 CMLB_INTERNAL_MINOR_NODES, 414 #else /* !XPV_HVM_DRIVER */ 415 CMLB_FAKE_LABEL_ONE_PARTITION, 416 #endif /* !XPV_HVM_DRIVER */ 417 vdp->xdf_vd_lbl, NULL) != 0) { 418 cmn_err(CE_WARN, "xdf@%s: default cmlb attach failed", 419 ddi_get_name_addr(devi)); 420 goto errout0; 421 } 422 423 /* 424 * We ship with cache-enabled disks 425 */ 426 vdp->xdf_wce = 1; 427 428 mutex_enter(&vdp->xdf_cb_lk); 429 430 /* Watch backend XenbusState change */ 431 if (xvdi_add_event_handler(devi, XS_OE_STATE, 432 xdf_oe_change) != DDI_SUCCESS) { 433 mutex_exit(&vdp->xdf_cb_lk); 434 goto errout0; 435 } 436 437 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 438 cmn_err(CE_WARN, "xdf@%s: start connection failed", 439 ddi_get_name_addr(devi)); 440 (void) xdf_start_disconnect(vdp); 441 mutex_exit(&vdp->xdf_cb_lk); 442 goto errout1; 443 } 444 445 mutex_exit(&vdp->xdf_cb_lk); 446 447 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 448 offsetof(v_req_t, v_link)); 449 list_create(&vdp->xdf_gs_act, sizeof (ge_slot_t), 450 offsetof(ge_slot_t, link)); 451 452 #if defined(XPV_HVM_DRIVER) 453 xdf_hvm_add(devi); 454 455 (void) ddi_prop_update_int(DDI_DEV_T_NONE, devi, DDI_NO_AUTODETACH, 1); 456 457 /* 458 * Report our version to dom0. 459 */ 460 if (xenbus_printf(XBT_NULL, "hvmpv/xdf", "version", "%d", 461 HVMPV_XDF_VERS)) 462 cmn_err(CE_WARN, "xdf: couldn't write version\n"); 463 #endif /* XPV_HVM_DRIVER */ 464 465 ddi_report_dev(devi); 466 467 DPRINTF(DDI_DBG, ("xdf%d: attached\n", instance)); 468 469 return (DDI_SUCCESS); 470 471 errout1: 472 xvdi_remove_event_handler(devi, XS_OE_STATE); 473 errout0: 474 if (vdp->xdf_vd_lbl != NULL) { 475 cmlb_detach(vdp->xdf_vd_lbl, NULL); 476 cmlb_free_handle(&vdp->xdf_vd_lbl); 477 vdp->xdf_vd_lbl = NULL; 478 } 479 #if !defined(XPV_HVM_DRIVER) 480 xdf_kstat_delete(devi); 481 #endif /* !XPV_HVM_DRIVER */ 482 if (vdp->xdf_softintr_id != NULL) 483 ddi_remove_softintr(vdp->xdf_softintr_id); 484 if (vdp->xdf_ibc != NULL) { 485 mutex_destroy(&vdp->xdf_cb_lk); 486 mutex_destroy(&vdp->xdf_dev_lk); 487 } 488 cv_destroy(&vdp->xdf_dev_cv); 489 ddi_soft_state_free(vbd_ss, instance); 490 ddi_set_driver_private(devi, NULL); 491 ddi_prop_remove_all(devi); 492 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(devi)); 493 return (DDI_FAILURE); 494 } 495 496 static int 497 xdf_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 498 { 499 xdf_t *vdp; 500 int instance; 501 502 switch (cmd) { 503 504 case DDI_PM_SUSPEND: 505 break; 506 507 case DDI_SUSPEND: 508 return (xdf_suspend(devi)); 509 510 case DDI_DETACH: 511 break; 512 513 default: 514 return (DDI_FAILURE); 515 } 516 517 instance = ddi_get_instance(devi); 518 DPRINTF(DDI_DBG, ("xdf%d: detaching\n", instance)); 519 vdp = ddi_get_soft_state(vbd_ss, instance); 520 521 if (vdp == NULL) 522 return (DDI_FAILURE); 523 524 mutex_enter(&vdp->xdf_dev_lk); 525 if (xdf_isopen(vdp, -1)) { 526 mutex_exit(&vdp->xdf_dev_lk); 527 return (DDI_FAILURE); 528 } 529 530 if (vdp->xdf_status != XD_CLOSED) { 531 mutex_exit(&vdp->xdf_dev_lk); 532 return (DDI_FAILURE); 533 } 534 535 #if defined(XPV_HVM_DRIVER) 536 xdf_hvm_rm(devi); 537 #endif /* XPV_HVM_DRIVER */ 538 539 ASSERT(!ISDMACBON(vdp)); 540 mutex_exit(&vdp->xdf_dev_lk); 541 542 if (vdp->xdf_timeout_id != 0) 543 (void) untimeout(vdp->xdf_timeout_id); 544 545 xvdi_remove_event_handler(devi, XS_OE_STATE); 546 547 /* we'll support backend running in domU later */ 548 #ifdef DOMU_BACKEND 549 (void) xvdi_post_event(devi, XEN_HP_REMOVE); 550 #endif 551 552 list_destroy(&vdp->xdf_vreq_act); 553 list_destroy(&vdp->xdf_gs_act); 554 ddi_prop_remove_all(devi); 555 xdf_kstat_delete(devi); 556 ddi_remove_softintr(vdp->xdf_softintr_id); 557 ddi_set_driver_private(devi, NULL); 558 cv_destroy(&vdp->xdf_dev_cv); 559 mutex_destroy(&vdp->xdf_cb_lk); 560 mutex_destroy(&vdp->xdf_dev_lk); 561 if (vdp->xdf_cache_flush_block != NULL) 562 kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE); 563 ddi_soft_state_free(vbd_ss, instance); 564 return (DDI_SUCCESS); 565 } 566 567 static int 568 xdf_suspend(dev_info_t *devi) 569 { 570 xdf_t *vdp; 571 int instance; 572 enum xdf_state st; 573 574 instance = ddi_get_instance(devi); 575 576 if (xdfdebug & SUSRES_DBG) 577 xen_printf("xdf_suspend: xdf#%d\n", instance); 578 579 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 580 return (DDI_FAILURE); 581 582 xvdi_suspend(devi); 583 584 mutex_enter(&vdp->xdf_cb_lk); 585 mutex_enter(&vdp->xdf_dev_lk); 586 st = vdp->xdf_status; 587 /* change status to stop further I/O requests */ 588 if (st == XD_READY) 589 vdp->xdf_status = XD_SUSPEND; 590 mutex_exit(&vdp->xdf_dev_lk); 591 mutex_exit(&vdp->xdf_cb_lk); 592 593 /* make sure no more I/O responses left in the ring buffer */ 594 if ((st == XD_INIT) || (st == XD_READY)) { 595 #ifdef XPV_HVM_DRIVER 596 ec_unbind_evtchn(vdp->xdf_evtchn); 597 xvdi_free_evtchn(devi); 598 #else /* !XPV_HVM_DRIVER */ 599 (void) ddi_remove_intr(devi, 0, NULL); 600 #endif /* !XPV_HVM_DRIVER */ 601 (void) xdf_drain_io(vdp); 602 /* 603 * no need to teardown the ring buffer here 604 * it will be simply re-init'ed during resume when 605 * we call xvdi_alloc_ring 606 */ 607 } 608 609 if (xdfdebug & SUSRES_DBG) 610 xen_printf("xdf_suspend: SUCCESS\n"); 611 612 return (DDI_SUCCESS); 613 } 614 615 /*ARGSUSED*/ 616 static int 617 xdf_resume(dev_info_t *devi) 618 { 619 xdf_t *vdp; 620 int instance; 621 622 instance = ddi_get_instance(devi); 623 if (xdfdebug & SUSRES_DBG) 624 xen_printf("xdf_resume: xdf%d\n", instance); 625 626 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 627 return (DDI_FAILURE); 628 629 mutex_enter(&vdp->xdf_cb_lk); 630 631 if (xvdi_resume(devi) != DDI_SUCCESS) { 632 mutex_exit(&vdp->xdf_cb_lk); 633 return (DDI_FAILURE); 634 } 635 636 mutex_enter(&vdp->xdf_dev_lk); 637 ASSERT(vdp->xdf_status != XD_READY); 638 vdp->xdf_status = XD_UNKNOWN; 639 mutex_exit(&vdp->xdf_dev_lk); 640 641 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 642 mutex_exit(&vdp->xdf_cb_lk); 643 return (DDI_FAILURE); 644 } 645 646 mutex_exit(&vdp->xdf_cb_lk); 647 648 if (xdfdebug & SUSRES_DBG) 649 xen_printf("xdf_resume: done\n"); 650 return (DDI_SUCCESS); 651 } 652 653 /*ARGSUSED*/ 654 static int 655 xdf_reset(dev_info_t *devi, ddi_reset_cmd_t cmd) 656 { 657 xdf_t *vdp; 658 int instance; 659 660 instance = ddi_get_instance(devi); 661 DPRINTF(DDI_DBG, ("xdf%d: resetting\n", instance)); 662 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 663 return (DDI_FAILURE); 664 665 /* 666 * wait for any outstanding I/O to complete 667 */ 668 (void) xdf_drain_io(vdp); 669 670 DPRINTF(DDI_DBG, ("xdf%d: reset complete\n", instance)); 671 return (DDI_SUCCESS); 672 } 673 674 static int 675 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 676 { 677 minor_t minor; 678 xdf_t *vdp; 679 int part; 680 ulong_t parbit; 681 diskaddr_t p_blkct = 0; 682 boolean_t firstopen; 683 boolean_t nodelay; 684 685 minor = getminor(*devp); 686 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 687 return (ENXIO); 688 689 nodelay = (flag & (FNDELAY | FNONBLOCK)); 690 691 DPRINTF(DDI_DBG, ("xdf%d: opening\n", XDF_INST(minor))); 692 693 /* do cv_wait until connected or failed */ 694 mutex_enter(&vdp->xdf_dev_lk); 695 if (!nodelay && (xdf_connect(vdp, B_TRUE) != XD_READY)) { 696 mutex_exit(&vdp->xdf_dev_lk); 697 return (ENXIO); 698 } 699 700 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 701 mutex_exit(&vdp->xdf_dev_lk); 702 return (EROFS); 703 } 704 705 part = XDF_PART(minor); 706 parbit = 1 << part; 707 if ((vdp->xdf_vd_exclopen & parbit) || 708 ((flag & FEXCL) && xdf_isopen(vdp, part))) { 709 mutex_exit(&vdp->xdf_dev_lk); 710 return (EBUSY); 711 } 712 713 /* are we the first one to open this node? */ 714 firstopen = !xdf_isopen(vdp, -1); 715 716 if (otyp == OTYP_LYR) 717 vdp->xdf_vd_lyropen[part]++; 718 719 vdp->xdf_vd_open[otyp] |= parbit; 720 721 if (flag & FEXCL) 722 vdp->xdf_vd_exclopen |= parbit; 723 724 mutex_exit(&vdp->xdf_dev_lk); 725 726 /* force a re-validation */ 727 if (firstopen) 728 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 729 730 /* 731 * check size 732 * ignore CD/DVD which contains a zero-sized s0 733 */ 734 if (!nodelay && !XD_IS_CD(vdp) && 735 ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 736 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0))) { 737 (void) xdf_close(*devp, flag, otyp, credp); 738 return (ENXIO); 739 } 740 741 return (0); 742 } 743 744 /*ARGSUSED*/ 745 static int 746 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 747 { 748 minor_t minor; 749 xdf_t *vdp; 750 int part; 751 ulong_t parbit; 752 753 minor = getminor(dev); 754 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 755 return (ENXIO); 756 757 mutex_enter(&vdp->xdf_dev_lk); 758 part = XDF_PART(minor); 759 if (!xdf_isopen(vdp, part)) { 760 mutex_exit(&vdp->xdf_dev_lk); 761 return (ENXIO); 762 } 763 parbit = 1 << part; 764 765 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); 766 if (otyp == OTYP_LYR) { 767 ASSERT(vdp->xdf_vd_lyropen[part] > 0); 768 if (--vdp->xdf_vd_lyropen[part] == 0) 769 vdp->xdf_vd_open[otyp] &= ~parbit; 770 } else { 771 vdp->xdf_vd_open[otyp] &= ~parbit; 772 } 773 vdp->xdf_vd_exclopen &= ~parbit; 774 775 mutex_exit(&vdp->xdf_dev_lk); 776 return (0); 777 } 778 779 static int 780 xdf_strategy(struct buf *bp) 781 { 782 xdf_t *vdp; 783 minor_t minor; 784 diskaddr_t p_blkct, p_blkst; 785 ulong_t nblks; 786 int part; 787 788 minor = getminor(bp->b_edev); 789 part = XDF_PART(minor); 790 791 vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)); 792 if ((vdp == NULL) || !xdf_isopen(vdp, part)) { 793 bioerror(bp, ENXIO); 794 bp->b_resid = bp->b_bcount; 795 biodone(bp); 796 return (0); 797 } 798 799 /* Check for writes to a read only device */ 800 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 801 bioerror(bp, EROFS); 802 bp->b_resid = bp->b_bcount; 803 biodone(bp); 804 return (0); 805 } 806 807 /* Check if this I/O is accessing a partition or the entire disk */ 808 if ((long)bp->b_private == XB_SLICE_NONE) { 809 /* This I/O is using an absolute offset */ 810 p_blkct = vdp->xdf_xdev_nblocks; 811 p_blkst = 0; 812 } else { 813 /* This I/O is using a partition relative offset */ 814 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 815 &p_blkst, NULL, NULL, NULL)) { 816 bioerror(bp, ENXIO); 817 bp->b_resid = bp->b_bcount; 818 biodone(bp); 819 return (0); 820 } 821 } 822 823 /* check for a starting block beyond the disk or partition limit */ 824 if (bp->b_blkno > p_blkct) { 825 DPRINTF(IO_DBG, ("xdf: block %lld exceeds VBD size %"PRIu64, 826 (longlong_t)bp->b_blkno, (uint64_t)p_blkct)); 827 bioerror(bp, EINVAL); 828 bp->b_resid = bp->b_bcount; 829 biodone(bp); 830 return (0); 831 } 832 833 /* Legacy: don't set error flag at this case */ 834 if (bp->b_blkno == p_blkct) { 835 bp->b_resid = bp->b_bcount; 836 biodone(bp); 837 return (0); 838 } 839 840 /* Adjust for partial transfer */ 841 nblks = bp->b_bcount >> XB_BSHIFT; 842 if ((bp->b_blkno + nblks) > p_blkct) { 843 bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT; 844 bp->b_bcount -= bp->b_resid; 845 } 846 847 DPRINTF(IO_DBG, ("xdf: strategy blk %lld len %lu\n", 848 (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount)); 849 850 /* Fix up the buf struct */ 851 bp->b_flags |= B_BUSY; 852 bp->av_forw = bp->av_back = NULL; /* not tagged with a v_req */ 853 bp->b_private = (void *)(uintptr_t)p_blkst; 854 855 mutex_enter(&vdp->xdf_dev_lk); 856 if (vdp->xdf_xdev_iostat != NULL) 857 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 858 if (vdp->xdf_f_act == NULL) { 859 vdp->xdf_f_act = vdp->xdf_l_act = bp; 860 } else { 861 vdp->xdf_l_act->av_forw = bp; 862 vdp->xdf_l_act = bp; 863 } 864 mutex_exit(&vdp->xdf_dev_lk); 865 866 xdf_iostart(vdp); 867 if (do_polled_io) 868 (void) xdf_drain_io(vdp); 869 return (0); 870 } 871 872 /*ARGSUSED*/ 873 static int 874 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 875 { 876 877 xdf_t *vdp; 878 minor_t minor; 879 diskaddr_t p_blkcnt; 880 int part; 881 882 minor = getminor(dev); 883 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 884 return (ENXIO); 885 886 DPRINTF(IO_DBG, ("xdf: read offset 0x%"PRIx64"\n", 887 (int64_t)uiop->uio_offset)); 888 889 part = XDF_PART(minor); 890 if (!xdf_isopen(vdp, part)) 891 return (ENXIO); 892 893 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 894 NULL, NULL, NULL, NULL)) 895 return (ENXIO); 896 897 if (U_INVAL(uiop)) 898 return (EINVAL); 899 900 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 901 } 902 903 /*ARGSUSED*/ 904 static int 905 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 906 { 907 xdf_t *vdp; 908 minor_t minor; 909 diskaddr_t p_blkcnt; 910 int part; 911 912 minor = getminor(dev); 913 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 914 return (ENXIO); 915 916 DPRINTF(IO_DBG, ("xdf: write offset 0x%"PRIx64"\n", 917 (int64_t)uiop->uio_offset)); 918 919 part = XDF_PART(minor); 920 if (!xdf_isopen(vdp, part)) 921 return (ENXIO); 922 923 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 924 NULL, NULL, NULL, NULL)) 925 return (ENXIO); 926 927 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 928 return (ENOSPC); 929 930 if (U_INVAL(uiop)) 931 return (EINVAL); 932 933 return (physio(xdf_strategy, NULL, dev, B_WRITE, minphys, uiop)); 934 } 935 936 /*ARGSUSED*/ 937 static int 938 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 939 { 940 xdf_t *vdp; 941 minor_t minor; 942 struct uio *uiop = aiop->aio_uio; 943 diskaddr_t p_blkcnt; 944 int part; 945 946 minor = getminor(dev); 947 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 948 return (ENXIO); 949 950 part = XDF_PART(minor); 951 if (!xdf_isopen(vdp, part)) 952 return (ENXIO); 953 954 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 955 NULL, NULL, NULL, NULL)) 956 return (ENXIO); 957 958 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 959 return (ENOSPC); 960 961 if (U_INVAL(uiop)) 962 return (EINVAL); 963 964 return (aphysio(xdf_strategy, anocancel, dev, B_READ, minphys, aiop)); 965 } 966 967 /*ARGSUSED*/ 968 static int 969 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 970 { 971 xdf_t *vdp; 972 minor_t minor; 973 struct uio *uiop = aiop->aio_uio; 974 diskaddr_t p_blkcnt; 975 int part; 976 977 minor = getminor(dev); 978 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 979 return (ENXIO); 980 981 part = XDF_PART(minor); 982 if (!xdf_isopen(vdp, part)) 983 return (ENXIO); 984 985 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 986 NULL, NULL, NULL, NULL)) 987 return (ENXIO); 988 989 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 990 return (ENOSPC); 991 992 if (U_INVAL(uiop)) 993 return (EINVAL); 994 995 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, minphys, aiop)); 996 } 997 998 static int 999 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1000 { 1001 struct buf dumpbuf, *dbp; 1002 xdf_t *vdp; 1003 minor_t minor; 1004 int err = 0; 1005 int part; 1006 diskaddr_t p_blkcnt, p_blkst; 1007 1008 minor = getminor(dev); 1009 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 1010 return (ENXIO); 1011 1012 DPRINTF(IO_DBG, ("xdf: dump addr (0x%p) blk (%ld) nblks (%d)\n", 1013 addr, blkno, nblk)); 1014 1015 part = XDF_PART(minor); 1016 if (!xdf_isopen(vdp, part)) 1017 return (ENXIO); 1018 1019 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 1020 NULL, NULL, NULL)) 1021 return (ENXIO); 1022 1023 if ((blkno + nblk) > p_blkcnt) { 1024 cmn_err(CE_WARN, "xdf: block %ld exceeds VBD size %"PRIu64, 1025 blkno + nblk, (uint64_t)p_blkcnt); 1026 return (EINVAL); 1027 } 1028 1029 dbp = &dumpbuf; 1030 bioinit(dbp); 1031 dbp->b_flags = B_BUSY; 1032 dbp->b_un.b_addr = addr; 1033 dbp->b_bcount = nblk << DEV_BSHIFT; 1034 dbp->b_blkno = blkno; 1035 dbp->b_edev = dev; 1036 dbp->b_private = (void *)(uintptr_t)p_blkst; 1037 1038 mutex_enter(&vdp->xdf_dev_lk); 1039 if (vdp->xdf_xdev_iostat != NULL) 1040 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1041 if (vdp->xdf_f_act == NULL) { 1042 vdp->xdf_f_act = vdp->xdf_l_act = dbp; 1043 } else { 1044 vdp->xdf_l_act->av_forw = dbp; 1045 vdp->xdf_l_act = dbp; 1046 } 1047 dbp->av_forw = NULL; 1048 dbp->av_back = NULL; 1049 mutex_exit(&vdp->xdf_dev_lk); 1050 xdf_iostart(vdp); 1051 err = xdf_drain_io(vdp); 1052 biofini(dbp); 1053 return (err); 1054 } 1055 1056 /*ARGSUSED*/ 1057 static int 1058 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 1059 int *rvalp) 1060 { 1061 int instance; 1062 xdf_t *vdp; 1063 minor_t minor; 1064 int part; 1065 1066 minor = getminor(dev); 1067 instance = XDF_INST(minor); 1068 1069 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 1070 return (ENXIO); 1071 1072 DPRINTF(IOCTL_DBG, ("xdf%d:ioctl: cmd %d (0x%x)\n", 1073 instance, cmd, cmd)); 1074 1075 part = XDF_PART(minor); 1076 if (!xdf_isopen(vdp, part)) 1077 return (ENXIO); 1078 1079 switch (cmd) { 1080 case DKIOCGMEDIAINFO: { 1081 struct dk_minfo media_info; 1082 1083 media_info.dki_lbsize = DEV_BSIZE; 1084 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; 1085 media_info.dki_media_type = DK_FIXED_DISK; 1086 1087 if (ddi_copyout(&media_info, (void *)arg, 1088 sizeof (struct dk_minfo), mode)) { 1089 return (EFAULT); 1090 } else { 1091 return (0); 1092 } 1093 } 1094 1095 case DKIOCINFO: { 1096 struct dk_cinfo info; 1097 1098 /* controller information */ 1099 if (XD_IS_CD(vdp)) 1100 info.dki_ctype = DKC_CDROM; 1101 else 1102 info.dki_ctype = DKC_VBD; 1103 1104 info.dki_cnum = 0; 1105 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 1106 1107 /* unit information */ 1108 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 1109 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 1110 info.dki_flags = DKI_FMTVOL; 1111 info.dki_partition = part; 1112 info.dki_maxtransfer = maxphys / DEV_BSIZE; 1113 info.dki_addr = 0; 1114 info.dki_space = 0; 1115 info.dki_prio = 0; 1116 info.dki_vec = 0; 1117 1118 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 1119 return (EFAULT); 1120 else 1121 return (0); 1122 } 1123 1124 case DKIOCSTATE: { 1125 enum dkio_state dkstate = DKIO_INSERTED; 1126 if (ddi_copyout(&dkstate, (void *)arg, sizeof (dkstate), 1127 mode) != 0) 1128 return (EFAULT); 1129 return (0); 1130 } 1131 1132 /* 1133 * is media removable? 1134 */ 1135 case DKIOCREMOVABLE: { 1136 int i = XD_IS_RM(vdp) ? 1 : 0; 1137 if (ddi_copyout(&i, (caddr_t)arg, sizeof (int), mode)) 1138 return (EFAULT); 1139 return (0); 1140 } 1141 1142 case DKIOCG_PHYGEOM: 1143 case DKIOCG_VIRTGEOM: 1144 case DKIOCGGEOM: 1145 case DKIOCSGEOM: 1146 case DKIOCGAPART: 1147 case DKIOCSAPART: 1148 case DKIOCGVTOC: 1149 case DKIOCSVTOC: 1150 case DKIOCPARTINFO: 1151 case DKIOCGMBOOT: 1152 case DKIOCSMBOOT: 1153 case DKIOCGETEFI: 1154 case DKIOCSETEFI: 1155 case DKIOCPARTITION: { 1156 int rc; 1157 1158 rc = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 1159 rvalp, NULL); 1160 return (rc); 1161 } 1162 1163 case DKIOCGETWCE: 1164 if (ddi_copyout(&vdp->xdf_wce, (void *)arg, 1165 sizeof (vdp->xdf_wce), mode)) 1166 return (EFAULT); 1167 return (0); 1168 case DKIOCSETWCE: 1169 if (ddi_copyin((void *)arg, &vdp->xdf_wce, 1170 sizeof (vdp->xdf_wce), mode)) 1171 return (EFAULT); 1172 return (0); 1173 case DKIOCFLUSHWRITECACHE: { 1174 int rc; 1175 struct dk_callback *dkc = (struct dk_callback *)arg; 1176 1177 if (vdp->xdf_flush_supported) { 1178 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1179 NULL, 0, 0, (void *)dev); 1180 } else if (vdp->xdf_feature_barrier && 1181 !xdf_barrier_flush_disable) { 1182 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1183 vdp->xdf_cache_flush_block, xdf_flush_block, 1184 DEV_BSIZE, (void *)dev); 1185 } else { 1186 return (ENOTTY); 1187 } 1188 if ((mode & FKIOCTL) && (dkc != NULL) && 1189 (dkc->dkc_callback != NULL)) { 1190 (*dkc->dkc_callback)(dkc->dkc_cookie, rc); 1191 /* need to return 0 after calling callback */ 1192 rc = 0; 1193 } 1194 return (rc); 1195 } 1196 1197 default: 1198 return (ENOTTY); 1199 } 1200 } 1201 1202 /* 1203 * xdf interrupt handler 1204 */ 1205 static uint_t 1206 xdf_intr(caddr_t arg) 1207 { 1208 xdf_t *vdp = (xdf_t *)arg; 1209 xendev_ring_t *xbr; 1210 blkif_response_t *resp; 1211 int bioerr; 1212 uint64_t id; 1213 extern int do_polled_io; 1214 uint8_t op; 1215 uint16_t status; 1216 ddi_acc_handle_t acchdl; 1217 1218 mutex_enter(&vdp->xdf_dev_lk); 1219 1220 if ((xbr = vdp->xdf_xb_ring) == NULL) { 1221 mutex_exit(&vdp->xdf_dev_lk); 1222 return (DDI_INTR_UNCLAIMED); 1223 } 1224 1225 acchdl = vdp->xdf_xb_ring_hdl; 1226 1227 /* 1228 * complete all requests which have a response 1229 */ 1230 while (resp = xvdi_ring_get_response(xbr)) { 1231 id = ddi_get64(acchdl, &resp->id); 1232 op = ddi_get8(acchdl, &resp->operation); 1233 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 1234 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 1235 op, id, status)); 1236 1237 /* 1238 * XXPV - close connection to the backend and restart 1239 */ 1240 if (status != BLKIF_RSP_OKAY) { 1241 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 1242 ddi_get_name_addr(vdp->xdf_dip), 1243 (op == BLKIF_OP_READ) ? "reading" : "writing")); 1244 bioerr = EIO; 1245 } else { 1246 bioerr = 0; 1247 } 1248 1249 xdf_iofini(vdp, id, bioerr); 1250 } 1251 1252 mutex_exit(&vdp->xdf_dev_lk); 1253 1254 if (!do_polled_io) 1255 xdf_iostart(vdp); 1256 1257 return (DDI_INTR_CLAIMED); 1258 } 1259 1260 int xdf_fbrewrites; /* how many times was our flush block rewritten */ 1261 1262 /* 1263 * Snarf new data if our flush block was re-written 1264 */ 1265 static void 1266 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 1267 { 1268 int nblks; 1269 boolean_t mapin; 1270 1271 if (IS_WRITE_BARRIER(vdp, bp)) 1272 return; /* write was a flush write */ 1273 1274 mapin = B_FALSE; 1275 nblks = bp->b_bcount >> DEV_BSHIFT; 1276 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 1277 xdf_fbrewrites++; 1278 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 1279 mapin = B_TRUE; 1280 bp_mapin(bp); 1281 } 1282 bcopy(bp->b_un.b_addr + 1283 ((xdf_flush_block - blkno) << DEV_BSHIFT), 1284 vdp->xdf_cache_flush_block, DEV_BSIZE); 1285 if (mapin) 1286 bp_mapout(bp); 1287 } 1288 } 1289 1290 static void 1291 xdf_iofini(xdf_t *vdp, uint64_t id, int bioerr) 1292 { 1293 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 1294 v_req_t *vreq = gs->vreq; 1295 buf_t *bp = vreq->v_buf; 1296 1297 gs_free(vdp, gs); 1298 if (bioerr) 1299 bioerror(bp, bioerr); 1300 vreq->v_nslots--; 1301 if (vreq->v_nslots != 0) 1302 return; 1303 1304 XDF_UPDATE_IO_STAT(vdp, bp); 1305 if (vdp->xdf_xdev_iostat != NULL) 1306 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1307 1308 if (IS_ERROR(bp)) 1309 bp->b_resid = bp->b_bcount; 1310 1311 vreq_free(vdp, vreq); 1312 biodone(bp); 1313 } 1314 1315 /* 1316 * return value of xdf_prepare_rreq() 1317 * used in xdf_iostart() 1318 */ 1319 #define XF_PARTIAL 0 /* rreq is full, not all I/O in buf transferred */ 1320 #define XF_COMP 1 /* no more I/O left in buf */ 1321 1322 static void 1323 xdf_iostart(xdf_t *vdp) 1324 { 1325 xendev_ring_t *xbr; 1326 struct buf *bp; 1327 blkif_request_t *rreq; 1328 int retval; 1329 int rreqready = 0; 1330 1331 xbr = vdp->xdf_xb_ring; 1332 1333 /* 1334 * populate the ring request(s) 1335 * 1336 * loop until there is no buf to transfer or no free slot 1337 * available in I/O ring 1338 */ 1339 mutex_enter(&vdp->xdf_dev_lk); 1340 1341 for (;;) { 1342 if (vdp->xdf_status != XD_READY) 1343 break; 1344 1345 /* active buf queue empty? */ 1346 if ((bp = vdp->xdf_f_act) == NULL) 1347 break; 1348 1349 /* try to grab a vreq for this bp */ 1350 if ((BP2VREQ(bp) == NULL) && (vreq_get(vdp, bp) == NULL)) 1351 break; 1352 /* alloc DMA/GTE resources */ 1353 if (vreq_setup(vdp, BP2VREQ(bp)) != DDI_SUCCESS) 1354 break; 1355 1356 /* get next blkif_request in the ring */ 1357 if ((rreq = xvdi_ring_get_request(xbr)) == NULL) 1358 break; 1359 bzero(rreq, sizeof (blkif_request_t)); 1360 1361 /* populate blkif_request with this buf */ 1362 rreqready++; 1363 retval = xdf_prepare_rreq(vdp, bp, rreq); 1364 if (retval == XF_COMP) { 1365 /* finish this bp, switch to next one */ 1366 if (vdp->xdf_xdev_iostat != NULL) 1367 kstat_waitq_to_runq( 1368 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1369 vdp->xdf_f_act = bp->av_forw; 1370 bp->av_forw = NULL; 1371 } 1372 } 1373 1374 /* 1375 * Send the request(s) to the backend 1376 */ 1377 if (rreqready) { 1378 if (xvdi_ring_push_request(xbr)) { 1379 DPRINTF(IO_DBG, ("xdf_iostart: " 1380 "sent request(s) to backend\n")); 1381 xvdi_notify_oe(vdp->xdf_dip); 1382 } 1383 } 1384 1385 mutex_exit(&vdp->xdf_dev_lk); 1386 } 1387 1388 /* 1389 * populate a single blkif_request_t w/ a buf 1390 */ 1391 static int 1392 xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1393 { 1394 int rval; 1395 grant_ref_t gr; 1396 uint8_t fsect, lsect; 1397 size_t bcnt; 1398 paddr_t dma_addr; 1399 off_t blk_off; 1400 dev_info_t *dip = vdp->xdf_dip; 1401 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1402 v_req_t *vreq = BP2VREQ(bp); 1403 uint64_t blkno = vreq->v_blkno; 1404 uint_t ndmacs = vreq->v_ndmacs; 1405 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1406 int seg = 0; 1407 int isread = IS_READ(bp); 1408 1409 if (isread) 1410 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1411 else { 1412 switch (vreq->v_flush_diskcache) { 1413 case FLUSH_DISKCACHE: 1414 ddi_put8(acchdl, &rreq->operation, 1415 BLKIF_OP_FLUSH_DISKCACHE); 1416 ddi_put16(acchdl, &rreq->handle, vdev); 1417 ddi_put64(acchdl, &rreq->id, 1418 (uint64_t)(uintptr_t)(vreq->v_gs)); 1419 ddi_put8(acchdl, &rreq->nr_segments, 0); 1420 return (XF_COMP); 1421 case WRITE_BARRIER: 1422 ddi_put8(acchdl, &rreq->operation, 1423 BLKIF_OP_WRITE_BARRIER); 1424 break; 1425 default: 1426 if (!vdp->xdf_wce) 1427 ddi_put8(acchdl, &rreq->operation, 1428 BLKIF_OP_WRITE_BARRIER); 1429 else 1430 ddi_put8(acchdl, &rreq->operation, 1431 BLKIF_OP_WRITE); 1432 break; 1433 } 1434 } 1435 1436 ddi_put16(acchdl, &rreq->handle, vdev); 1437 ddi_put64(acchdl, &rreq->sector_number, blkno); 1438 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(vreq->v_gs)); 1439 1440 /* 1441 * loop until all segments are populated or no more dma cookie in buf 1442 */ 1443 for (;;) { 1444 /* 1445 * Each segment of a blkif request can transfer up to 1446 * one 4K page of data. 1447 */ 1448 bcnt = vreq->v_dmac.dmac_size; 1449 ASSERT(bcnt <= PAGESIZE); 1450 ASSERT((bcnt % XB_BSIZE) == 0); 1451 dma_addr = vreq->v_dmac.dmac_laddress; 1452 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1453 ASSERT((blk_off & XB_BMASK) == 0); 1454 fsect = blk_off >> XB_BSHIFT; 1455 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1456 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1457 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1458 DPRINTF(IO_DBG, (" ""seg%d: dmacS %lu blk_off %ld\n", 1459 seg, vreq->v_dmac.dmac_size, blk_off)); 1460 gr = gs_grant(vreq->v_gs, PATOMA(dma_addr) >> PAGESHIFT); 1461 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1462 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1463 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1464 DPRINTF(IO_DBG, (" ""seg%d: fs %d ls %d gr %d dma 0x%"PRIx64 1465 "\n", seg, fsect, lsect, gr, dma_addr)); 1466 1467 blkno += (bcnt >> XB_BSHIFT); 1468 seg++; 1469 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1470 if (--ndmacs) { 1471 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1472 continue; 1473 } 1474 1475 vreq->v_status = VREQ_DMAWIN_DONE; 1476 vreq->v_blkno = blkno; 1477 if (vreq->v_dmaw + 1 == vreq->v_ndmaws) 1478 /* last win */ 1479 rval = XF_COMP; 1480 else 1481 rval = XF_PARTIAL; 1482 break; 1483 } 1484 ddi_put8(acchdl, &rreq->nr_segments, seg); 1485 DPRINTF(IO_DBG, ("xdf_prepare_rreq: request id=%"PRIx64" ready\n", 1486 rreq->id)); 1487 1488 return (rval); 1489 } 1490 1491 #define XDF_QSEC 50000 /* .005 second */ 1492 #define XDF_POLLCNT 12 /* loop for 12 times before time out */ 1493 1494 static int 1495 xdf_drain_io(xdf_t *vdp) 1496 { 1497 int pollc, rval; 1498 xendev_ring_t *xbr; 1499 1500 if (xdfdebug & SUSRES_DBG) 1501 xen_printf("xdf_drain_io: start\n"); 1502 1503 mutex_enter(&vdp->xdf_dev_lk); 1504 1505 if ((vdp->xdf_status != XD_READY) && (vdp->xdf_status != XD_SUSPEND)) 1506 goto out; 1507 1508 rval = 0; 1509 xbr = vdp->xdf_xb_ring; 1510 ASSERT(xbr != NULL); 1511 1512 for (pollc = 0; pollc < XDF_POLLCNT; pollc++) { 1513 if (xvdi_ring_has_unconsumed_responses(xbr)) { 1514 mutex_exit(&vdp->xdf_dev_lk); 1515 (void) xdf_intr((caddr_t)vdp); 1516 mutex_enter(&vdp->xdf_dev_lk); 1517 } 1518 if (!xvdi_ring_has_incomp_request(xbr)) 1519 goto out; 1520 1521 #ifndef XPV_HVM_DRIVER 1522 (void) HYPERVISOR_yield(); 1523 #endif /* XPV_HVM_DRIVER */ 1524 /* 1525 * file-backed devices can be slow 1526 */ 1527 drv_usecwait(XDF_QSEC << pollc); 1528 } 1529 cmn_err(CE_WARN, "xdf_polled_io: timeout"); 1530 rval = EIO; 1531 out: 1532 mutex_exit(&vdp->xdf_dev_lk); 1533 if (xdfdebug & SUSRES_DBG) 1534 xen_printf("xdf_drain_io: end, err=%d\n", rval); 1535 return (rval); 1536 } 1537 1538 /* ARGSUSED5 */ 1539 int 1540 xdf_lb_rdwr(dev_info_t *devi, uchar_t cmd, void *bufp, 1541 diskaddr_t start, size_t reqlen, void *tg_cookie) 1542 { 1543 xdf_t *vdp; 1544 struct buf *bp; 1545 int err = 0; 1546 1547 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1548 if (vdp == NULL) 1549 return (ENXIO); 1550 1551 if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) 1552 return (EINVAL); 1553 1554 bp = getrbuf(KM_SLEEP); 1555 if (cmd == TG_READ) 1556 bp->b_flags = B_BUSY | B_READ; 1557 else 1558 bp->b_flags = B_BUSY | B_WRITE; 1559 bp->b_un.b_addr = bufp; 1560 bp->b_bcount = reqlen; 1561 bp->b_blkno = start; 1562 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 1563 1564 mutex_enter(&vdp->xdf_dev_lk); 1565 if (vdp->xdf_xdev_iostat != NULL) 1566 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1567 if (vdp->xdf_f_act == NULL) { 1568 vdp->xdf_f_act = vdp->xdf_l_act = bp; 1569 } else { 1570 vdp->xdf_l_act->av_forw = bp; 1571 vdp->xdf_l_act = bp; 1572 } 1573 mutex_exit(&vdp->xdf_dev_lk); 1574 xdf_iostart(vdp); 1575 err = biowait(bp); 1576 1577 ASSERT(bp->b_flags & B_DONE); 1578 1579 freerbuf(bp); 1580 return (err); 1581 } 1582 1583 /* 1584 * synthetic geometry 1585 */ 1586 #define XDF_NSECTS 256 1587 #define XDF_NHEADS 16 1588 1589 static void 1590 xdf_synthetic_pgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1591 { 1592 xdf_t *vdp; 1593 uint_t ncyl; 1594 1595 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1596 1597 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); 1598 1599 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1600 geomp->g_acyl = 0; 1601 geomp->g_nhead = XDF_NHEADS; 1602 geomp->g_secsize = XB_BSIZE; 1603 geomp->g_nsect = XDF_NSECTS; 1604 geomp->g_intrlv = 0; 1605 geomp->g_rpm = 7200; 1606 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1607 } 1608 1609 static int 1610 xdf_lb_getcap(dev_info_t *devi, diskaddr_t *capp) 1611 { 1612 xdf_t *vdp; 1613 1614 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1615 1616 if (vdp == NULL) 1617 return (ENXIO); 1618 1619 mutex_enter(&vdp->xdf_dev_lk); 1620 *capp = vdp->xdf_pgeom.g_capacity; 1621 DPRINTF(LBL_DBG, ("capacity %llu\n", *capp)); 1622 mutex_exit(&vdp->xdf_dev_lk); 1623 return (0); 1624 } 1625 1626 static int 1627 xdf_lb_getpgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1628 { 1629 xdf_t *vdp; 1630 1631 if ((vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi))) == NULL) 1632 return (ENXIO); 1633 *geomp = vdp->xdf_pgeom; 1634 return (0); 1635 } 1636 1637 /* 1638 * No real HBA, no geometry available from it 1639 */ 1640 /*ARGSUSED*/ 1641 static int 1642 xdf_lb_getvgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1643 { 1644 return (EINVAL); 1645 } 1646 1647 static int 1648 xdf_lb_getattribute(dev_info_t *devi, tg_attribute_t *tgattributep) 1649 { 1650 xdf_t *vdp; 1651 1652 if (!(vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)))) 1653 return (ENXIO); 1654 1655 if (XD_IS_RO(vdp)) 1656 tgattributep->media_is_writable = 0; 1657 else 1658 tgattributep->media_is_writable = 1; 1659 return (0); 1660 } 1661 1662 /* ARGSUSED3 */ 1663 int 1664 xdf_lb_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie) 1665 { 1666 switch (cmd) { 1667 case TG_GETPHYGEOM: 1668 return (xdf_lb_getpgeom(devi, (cmlb_geom_t *)arg)); 1669 case TG_GETVIRTGEOM: 1670 return (xdf_lb_getvgeom(devi, (cmlb_geom_t *)arg)); 1671 case TG_GETCAPACITY: 1672 return (xdf_lb_getcap(devi, (diskaddr_t *)arg)); 1673 case TG_GETBLOCKSIZE: 1674 *(uint32_t *)arg = XB_BSIZE; 1675 return (0); 1676 case TG_GETATTR: 1677 return (xdf_lb_getattribute(devi, (tg_attribute_t *)arg)); 1678 default: 1679 return (ENOTTY); 1680 } 1681 } 1682 1683 /* 1684 * Kick-off connect process 1685 * Status should be XD_UNKNOWN or XD_CLOSED 1686 * On success, status will be changed to XD_INIT 1687 * On error, status won't be changed 1688 */ 1689 static int 1690 xdf_start_connect(xdf_t *vdp) 1691 { 1692 char *xsnode; 1693 grant_ref_t gref; 1694 xenbus_transaction_t xbt; 1695 int rv; 1696 dev_info_t *dip = vdp->xdf_dip; 1697 1698 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == (domid_t)-1) 1699 goto errout; 1700 1701 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) { 1702 cmn_err(CE_WARN, "xdf@%s: failed to alloc event channel", 1703 ddi_get_name_addr(dip)); 1704 goto errout; 1705 } 1706 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1707 #ifdef XPV_HVM_DRIVER 1708 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1709 #else /* !XPV_HVM_DRIVER */ 1710 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1711 DDI_SUCCESS) { 1712 cmn_err(CE_WARN, "xdf_start_connect: xdf@%s: " 1713 "failed to add intr handler", ddi_get_name_addr(dip)); 1714 goto errout1; 1715 } 1716 #endif /* !XPV_HVM_DRIVER */ 1717 1718 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1719 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1720 DDI_SUCCESS) { 1721 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1722 ddi_get_name_addr(dip)); 1723 goto errout2; 1724 } 1725 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1726 1727 /* 1728 * Write into xenstore the info needed by backend 1729 */ 1730 if ((xsnode = xvdi_get_xsname(dip)) == NULL) { 1731 cmn_err(CE_WARN, "xdf@%s: " 1732 "failed to get xenstore node path", 1733 ddi_get_name_addr(dip)); 1734 goto fail_trans; 1735 } 1736 trans_retry: 1737 if (xenbus_transaction_start(&xbt)) { 1738 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1739 ddi_get_name_addr(dip)); 1740 xvdi_fatal_error(dip, EIO, "transaction start"); 1741 goto fail_trans; 1742 } 1743 1744 if (rv = xenbus_printf(xbt, xsnode, "ring-ref", "%u", gref)) { 1745 cmn_err(CE_WARN, "xdf@%s: failed to write ring-ref", 1746 ddi_get_name_addr(dip)); 1747 xvdi_fatal_error(dip, rv, "writing ring-ref"); 1748 goto abort_trans; 1749 } 1750 1751 if (rv = xenbus_printf(xbt, xsnode, "event-channel", "%u", 1752 vdp->xdf_evtchn)) { 1753 cmn_err(CE_WARN, "xdf@%s: failed to write event-channel", 1754 ddi_get_name_addr(dip)); 1755 xvdi_fatal_error(dip, rv, "writing event-channel"); 1756 goto abort_trans; 1757 } 1758 1759 /* 1760 * "protocol" is written by the domain builder in the case of PV 1761 * domains. However, it is not written for HVM domains, so let's 1762 * write it here. 1763 */ 1764 if (rv = xenbus_printf(xbt, xsnode, "protocol", "%s", 1765 XEN_IO_PROTO_ABI_NATIVE)) { 1766 cmn_err(CE_WARN, "xdf@%s: failed to write protocol", 1767 ddi_get_name_addr(dip)); 1768 xvdi_fatal_error(dip, rv, "writing protocol"); 1769 goto abort_trans; 1770 } 1771 1772 if ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0) { 1773 cmn_err(CE_WARN, "xdf@%s: " 1774 "failed to switch state to XenbusStateInitialised", 1775 ddi_get_name_addr(dip)); 1776 xvdi_fatal_error(dip, rv, "writing state"); 1777 goto abort_trans; 1778 } 1779 1780 /* kick-off connect process */ 1781 if (rv = xenbus_transaction_end(xbt, 0)) { 1782 if (rv == EAGAIN) 1783 goto trans_retry; 1784 cmn_err(CE_WARN, "xdf@%s: failed to end transaction", 1785 ddi_get_name_addr(dip)); 1786 xvdi_fatal_error(dip, rv, "completing transaction"); 1787 goto fail_trans; 1788 } 1789 1790 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1791 mutex_enter(&vdp->xdf_dev_lk); 1792 vdp->xdf_status = XD_INIT; 1793 mutex_exit(&vdp->xdf_dev_lk); 1794 1795 return (DDI_SUCCESS); 1796 1797 abort_trans: 1798 (void) xenbus_transaction_end(xbt, 1); 1799 fail_trans: 1800 xvdi_free_ring(vdp->xdf_xb_ring); 1801 errout2: 1802 #ifdef XPV_HVM_DRIVER 1803 ec_unbind_evtchn(vdp->xdf_evtchn); 1804 #else /* !XPV_HVM_DRIVER */ 1805 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1806 #endif /* !XPV_HVM_DRIVER */ 1807 errout1: 1808 xvdi_free_evtchn(dip); 1809 errout: 1810 cmn_err(CE_WARN, "xdf@%s: fail to kick-off connecting", 1811 ddi_get_name_addr(dip)); 1812 return (DDI_FAILURE); 1813 } 1814 1815 /* 1816 * Kick-off disconnect process 1817 * Status won't be changed 1818 */ 1819 static int 1820 xdf_start_disconnect(xdf_t *vdp) 1821 { 1822 if (xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed) > 0) { 1823 cmn_err(CE_WARN, "xdf@%s: fail to kick-off disconnecting", 1824 ddi_get_name_addr(vdp->xdf_dip)); 1825 return (DDI_FAILURE); 1826 } 1827 1828 return (DDI_SUCCESS); 1829 } 1830 1831 int 1832 xdf_get_flush_block(xdf_t *vdp) 1833 { 1834 /* 1835 * Get a DEV_BSIZE aligned bufer 1836 */ 1837 vdp->xdf_flush_mem = kmem_alloc(DEV_BSIZE * 2, KM_SLEEP); 1838 vdp->xdf_cache_flush_block = 1839 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), DEV_BSIZE); 1840 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1841 xdf_flush_block, DEV_BSIZE, NULL) != 0) 1842 return (DDI_FAILURE); 1843 return (DDI_SUCCESS); 1844 } 1845 1846 /* 1847 * Finish other initialization after we've connected to backend 1848 * Status should be XD_INIT before calling this routine 1849 * On success, status should be changed to XD_READY 1850 * On error, status should stay XD_INIT 1851 */ 1852 static int 1853 xdf_post_connect(xdf_t *vdp) 1854 { 1855 int rv; 1856 uint_t len; 1857 char *type; 1858 char *barrier; 1859 dev_info_t *devi = vdp->xdf_dip; 1860 1861 /* 1862 * Determine if feature barrier is supported by backend 1863 */ 1864 if (xenbus_read(XBT_NULL, xvdi_get_oename(devi), 1865 "feature-barrier", (void **)&barrier, &len) == 0) { 1866 vdp->xdf_feature_barrier = 1; 1867 kmem_free(barrier, len); 1868 } else { 1869 cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier", 1870 ddi_get_name_addr(vdp->xdf_dip)); 1871 vdp->xdf_feature_barrier = 0; 1872 } 1873 1874 /* probe backend */ 1875 if (rv = xenbus_gather(XBT_NULL, xvdi_get_oename(devi), 1876 "sectors", "%"SCNu64, &vdp->xdf_xdev_nblocks, 1877 "info", "%u", &vdp->xdf_xdev_info, NULL)) { 1878 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1879 "cannot read backend info", ddi_get_name_addr(devi)); 1880 xvdi_fatal_error(devi, rv, "reading backend info"); 1881 return (DDI_FAILURE); 1882 } 1883 1884 /* 1885 * Make sure that the device we're connecting isn't smaller than 1886 * the old connected device. 1887 */ 1888 if (vdp->xdf_xdev_nblocks < vdp->xdf_pgeom.g_capacity) { 1889 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1890 "backend disk device shrank", ddi_get_name_addr(devi)); 1891 /* XXX: call xvdi_fatal_error() here? */ 1892 xvdi_fatal_error(devi, rv, "reading backend info"); 1893 return (DDI_FAILURE); 1894 } 1895 1896 /* 1897 * Only update the physical geometry to reflect the new device 1898 * size if this is the first time we're connecting to the backend 1899 * device. Once we assign a physical geometry to a device it stays 1900 * fixed until: 1901 * - we get detach and re-attached (at which point we 1902 * automatically assign a new physical geometry). 1903 * - someone calls TG_SETPHYGEOM to explicity set the 1904 * physical geometry. 1905 */ 1906 if (vdp->xdf_pgeom.g_capacity == 0) 1907 xdf_synthetic_pgeom(devi, &vdp->xdf_pgeom); 1908 1909 /* fix disk type */ 1910 if (xenbus_read(XBT_NULL, xvdi_get_xsname(devi), "device-type", 1911 (void **)&type, &len) != 0) { 1912 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1913 "cannot read device-type", ddi_get_name_addr(devi)); 1914 xvdi_fatal_error(devi, rv, "reading device-type"); 1915 return (DDI_FAILURE); 1916 } 1917 if (strcmp(type, "cdrom") == 0) 1918 vdp->xdf_xdev_info |= VDISK_CDROM; 1919 kmem_free(type, len); 1920 1921 /* 1922 * We've created all the minor nodes via cmlb_attach() using default 1923 * value in xdf_attach() to make it possible to block in xdf_open(), 1924 * in case there's anyone (say, booting thread) ever trying to open 1925 * it before connected to backend. We will refresh all those minor 1926 * nodes w/ latest info we've got now when we are almost connected. 1927 * 1928 * Don't do this when xdf is already opened by someone (could happen 1929 * during resume), for that cmlb_attach() will invalid the label info 1930 * and confuse those who has already opened the node, which is bad. 1931 */ 1932 if (!xdf_isopen(vdp, -1) && (XD_IS_CD(vdp) || XD_IS_RM(vdp))) { 1933 /* re-init cmlb w/ latest info we got from backend */ 1934 if (cmlb_attach(devi, &xdf_lb_ops, 1935 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 1936 XD_IS_RM(vdp), 1, 1937 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD, 1938 #if defined(XPV_HVM_DRIVER) 1939 CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT | 1940 CMLB_INTERNAL_MINOR_NODES, 1941 #else /* !XPV_HVM_DRIVER */ 1942 CMLB_FAKE_LABEL_ONE_PARTITION, 1943 #endif /* !XPV_HVM_DRIVER */ 1944 vdp->xdf_vd_lbl, NULL) != 0) { 1945 cmn_err(CE_WARN, "xdf@%s: cmlb attach failed", 1946 ddi_get_name_addr(devi)); 1947 return (DDI_FAILURE); 1948 } 1949 } 1950 1951 /* mark vbd is ready for I/O */ 1952 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1953 mutex_enter(&vdp->xdf_dev_lk); 1954 vdp->xdf_status = XD_READY; 1955 mutex_exit(&vdp->xdf_dev_lk); 1956 /* 1957 * If backend has feature-barrier, see if it supports disk 1958 * cache flush op. 1959 */ 1960 vdp->xdf_flush_supported = 0; 1961 if (vdp->xdf_feature_barrier) { 1962 /* 1963 * Pretend we already know flush is supported so probe 1964 * will attempt the correct op. 1965 */ 1966 vdp->xdf_flush_supported = 1; 1967 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1968 vdp->xdf_flush_supported = 1; 1969 } else { 1970 vdp->xdf_flush_supported = 0; 1971 /* 1972 * If the other end does not support the cache flush op 1973 * then we must use a barrier-write to force disk 1974 * cache flushing. Barrier writes require that a data 1975 * block actually be written. 1976 * Cache a block to barrier-write when we are 1977 * asked to perform a flush. 1978 * XXX - would it be better to just copy 1 block 1979 * (512 bytes) from whatever write we did last 1980 * and rewrite that block? 1981 */ 1982 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) 1983 return (DDI_FAILURE); 1984 } 1985 } 1986 1987 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", ddi_get_name_addr(devi), 1988 (uint64_t)vdp->xdf_xdev_nblocks); 1989 1990 return (DDI_SUCCESS); 1991 } 1992 1993 /* 1994 * Finish other uninitialization after we've disconnected from backend 1995 * when status is XD_CLOSING or XD_INIT. After returns, status is XD_CLOSED 1996 */ 1997 static void 1998 xdf_post_disconnect(xdf_t *vdp) 1999 { 2000 #ifdef XPV_HVM_DRIVER 2001 ec_unbind_evtchn(vdp->xdf_evtchn); 2002 #else /* !XPV_HVM_DRIVER */ 2003 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 2004 #endif /* !XPV_HVM_DRIVER */ 2005 xvdi_free_evtchn(vdp->xdf_dip); 2006 xvdi_free_ring(vdp->xdf_xb_ring); 2007 vdp->xdf_xb_ring = NULL; 2008 vdp->xdf_xb_ring_hdl = NULL; 2009 vdp->xdf_peer = (domid_t)-1; 2010 2011 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 2012 mutex_enter(&vdp->xdf_dev_lk); 2013 vdp->xdf_status = XD_CLOSED; 2014 mutex_exit(&vdp->xdf_dev_lk); 2015 } 2016 2017 /*ARGSUSED*/ 2018 static void 2019 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 2020 { 2021 XenbusState new_state = *(XenbusState *)impl_data; 2022 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2023 boolean_t unexpect_die = B_FALSE; 2024 int status; 2025 2026 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 2027 ddi_get_name_addr(dip), new_state)); 2028 2029 mutex_enter(&vdp->xdf_cb_lk); 2030 2031 if (xdf_check_state_transition(vdp, new_state) == DDI_FAILURE) { 2032 mutex_exit(&vdp->xdf_cb_lk); 2033 return; 2034 } 2035 2036 switch (new_state) { 2037 case XenbusStateInitialising: 2038 ASSERT(vdp->xdf_status == XD_CLOSED); 2039 /* 2040 * backend recovered from a previous failure, 2041 * kick-off connect process again 2042 */ 2043 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 2044 cmn_err(CE_WARN, "xdf@%s:" 2045 " failed to start reconnecting to backend", 2046 ddi_get_name_addr(dip)); 2047 } 2048 break; 2049 case XenbusStateConnected: 2050 ASSERT(vdp->xdf_status == XD_INIT); 2051 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 2052 /* finish final init after connect */ 2053 if (xdf_post_connect(vdp) != DDI_SUCCESS) 2054 (void) xdf_start_disconnect(vdp); 2055 break; 2056 case XenbusStateClosing: 2057 if (vdp->xdf_status == XD_READY) { 2058 mutex_enter(&vdp->xdf_dev_lk); 2059 if (xdf_isopen(vdp, -1)) { 2060 cmn_err(CE_NOTE, "xdf@%s: hot-unplug failed, " 2061 "still in use", ddi_get_name_addr(dip)); 2062 mutex_exit(&vdp->xdf_dev_lk); 2063 break; 2064 } else { 2065 vdp->xdf_status = XD_CLOSING; 2066 } 2067 mutex_exit(&vdp->xdf_dev_lk); 2068 } 2069 (void) xdf_start_disconnect(vdp); 2070 break; 2071 case XenbusStateClosed: 2072 /* first check if BE closed unexpectedly */ 2073 mutex_enter(&vdp->xdf_dev_lk); 2074 if (xdf_isopen(vdp, -1)) { 2075 unexpect_die = B_TRUE; 2076 unexpectedie(vdp); 2077 cmn_err(CE_WARN, "xdf@%s: backend closed, " 2078 "reconnecting...", ddi_get_name_addr(dip)); 2079 } 2080 mutex_exit(&vdp->xdf_dev_lk); 2081 2082 if (vdp->xdf_status == XD_READY) { 2083 mutex_enter(&vdp->xdf_dev_lk); 2084 vdp->xdf_status = XD_CLOSING; 2085 mutex_exit(&vdp->xdf_dev_lk); 2086 2087 #ifdef DOMU_BACKEND 2088 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 2089 #endif 2090 2091 xdf_post_disconnect(vdp); 2092 (void) xvdi_switch_state(dip, XBT_NULL, 2093 XenbusStateClosed); 2094 } else if ((vdp->xdf_status == XD_INIT) || 2095 (vdp->xdf_status == XD_CLOSING)) { 2096 xdf_post_disconnect(vdp); 2097 } else { 2098 mutex_enter(&vdp->xdf_dev_lk); 2099 vdp->xdf_status = XD_CLOSED; 2100 mutex_exit(&vdp->xdf_dev_lk); 2101 } 2102 } 2103 2104 /* notify anybody waiting for oe state change */ 2105 mutex_enter(&vdp->xdf_dev_lk); 2106 cv_broadcast(&vdp->xdf_dev_cv); 2107 mutex_exit(&vdp->xdf_dev_lk); 2108 2109 status = vdp->xdf_status; 2110 mutex_exit(&vdp->xdf_cb_lk); 2111 2112 if (status == XD_READY) { 2113 xdf_iostart(vdp); 2114 } else if ((status == XD_CLOSED) && !unexpect_die) { 2115 /* interface is closed successfully, remove all minor nodes */ 2116 if (vdp->xdf_vd_lbl != NULL) { 2117 cmlb_detach(vdp->xdf_vd_lbl, NULL); 2118 cmlb_free_handle(&vdp->xdf_vd_lbl); 2119 vdp->xdf_vd_lbl = NULL; 2120 } 2121 } 2122 } 2123 2124 /* check if partition is open, -1 - check all partitions on the disk */ 2125 static boolean_t 2126 xdf_isopen(xdf_t *vdp, int partition) 2127 { 2128 int i; 2129 ulong_t parbit; 2130 boolean_t rval = B_FALSE; 2131 2132 ASSERT((partition == -1) || 2133 ((partition >= 0) || (partition < XDF_PEXT))); 2134 2135 if (partition == -1) 2136 parbit = (ulong_t)-1; 2137 else 2138 parbit = 1 << partition; 2139 2140 for (i = 0; i < OTYPCNT; i++) { 2141 if (vdp->xdf_vd_open[i] & parbit) 2142 rval = B_TRUE; 2143 } 2144 2145 return (rval); 2146 } 2147 2148 /* 2149 * Xdf_check_state_transition will check the XenbusState change to see 2150 * if the change is a valid transition or not. 2151 * The new state is written by backend domain, or by running xenstore-write 2152 * to change it manually in dom0 2153 */ 2154 static int 2155 xdf_check_state_transition(xdf_t *vdp, XenbusState oestate) 2156 { 2157 int status; 2158 int stcheck; 2159 #define STOK 0 /* need further process */ 2160 #define STNOP 1 /* no action need taking */ 2161 #define STBUG 2 /* unexpected state change, could be a bug */ 2162 2163 status = vdp->xdf_status; 2164 stcheck = STOK; 2165 2166 switch (status) { 2167 case XD_UNKNOWN: 2168 if ((oestate == XenbusStateUnknown) || 2169 (oestate == XenbusStateConnected)) 2170 stcheck = STBUG; 2171 else if ((oestate == XenbusStateInitialising) || 2172 (oestate == XenbusStateInitWait) || 2173 (oestate == XenbusStateInitialised)) 2174 stcheck = STNOP; 2175 break; 2176 case XD_INIT: 2177 if (oestate == XenbusStateUnknown) 2178 stcheck = STBUG; 2179 else if ((oestate == XenbusStateInitialising) || 2180 (oestate == XenbusStateInitWait) || 2181 (oestate == XenbusStateInitialised)) 2182 stcheck = STNOP; 2183 break; 2184 case XD_READY: 2185 if ((oestate == XenbusStateUnknown) || 2186 (oestate == XenbusStateInitialising) || 2187 (oestate == XenbusStateInitWait) || 2188 (oestate == XenbusStateInitialised)) 2189 stcheck = STBUG; 2190 else if (oestate == XenbusStateConnected) 2191 stcheck = STNOP; 2192 break; 2193 case XD_CLOSING: 2194 if ((oestate == XenbusStateUnknown) || 2195 (oestate == XenbusStateInitialising) || 2196 (oestate == XenbusStateInitWait) || 2197 (oestate == XenbusStateInitialised) || 2198 (oestate == XenbusStateConnected)) 2199 stcheck = STBUG; 2200 else if (oestate == XenbusStateClosing) 2201 stcheck = STNOP; 2202 break; 2203 case XD_CLOSED: 2204 if ((oestate == XenbusStateUnknown) || 2205 (oestate == XenbusStateConnected)) 2206 stcheck = STBUG; 2207 else if ((oestate == XenbusStateInitWait) || 2208 (oestate == XenbusStateInitialised) || 2209 (oestate == XenbusStateClosing) || 2210 (oestate == XenbusStateClosed)) 2211 stcheck = STNOP; 2212 break; 2213 case XD_SUSPEND: 2214 default: 2215 stcheck = STBUG; 2216 } 2217 2218 if (stcheck == STOK) 2219 return (DDI_SUCCESS); 2220 2221 if (stcheck == STBUG) 2222 cmn_err(CE_NOTE, "xdf@%s: unexpected otherend " 2223 "state change to %d!, when status is %d", 2224 ddi_get_name_addr(vdp->xdf_dip), oestate, status); 2225 2226 return (DDI_FAILURE); 2227 } 2228 2229 static int 2230 xdf_connect(xdf_t *vdp, boolean_t wait) 2231 { 2232 ASSERT(mutex_owned(&vdp->xdf_dev_lk)); 2233 while (vdp->xdf_status != XD_READY) { 2234 if (!wait || (vdp->xdf_status > XD_READY)) 2235 break; 2236 2237 if (cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk) == 0) 2238 break; 2239 } 2240 2241 return (vdp->xdf_status); 2242 } 2243 2244 /* 2245 * callback func when DMA/GTE resources is available 2246 * 2247 * Note: we only register one callback function to grant table subsystem 2248 * since we only have one 'struct gnttab_free_callback' in xdf_t. 2249 */ 2250 static int 2251 xdf_dmacallback(caddr_t arg) 2252 { 2253 xdf_t *vdp = (xdf_t *)arg; 2254 ASSERT(vdp != NULL); 2255 2256 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 2257 ddi_get_name_addr(vdp->xdf_dip))); 2258 2259 ddi_trigger_softintr(vdp->xdf_softintr_id); 2260 return (DDI_DMA_CALLBACK_DONE); 2261 } 2262 2263 static uint_t 2264 xdf_iorestart(caddr_t arg) 2265 { 2266 xdf_t *vdp = (xdf_t *)arg; 2267 2268 ASSERT(vdp != NULL); 2269 2270 mutex_enter(&vdp->xdf_dev_lk); 2271 ASSERT(ISDMACBON(vdp)); 2272 SETDMACBOFF(vdp); 2273 mutex_exit(&vdp->xdf_dev_lk); 2274 2275 xdf_iostart(vdp); 2276 2277 return (DDI_INTR_CLAIMED); 2278 } 2279 2280 static void 2281 xdf_timeout_handler(void *arg) 2282 { 2283 xdf_t *vdp = arg; 2284 2285 mutex_enter(&vdp->xdf_dev_lk); 2286 vdp->xdf_timeout_id = 0; 2287 mutex_exit(&vdp->xdf_dev_lk); 2288 2289 /* new timeout thread could be re-scheduled */ 2290 xdf_iostart(vdp); 2291 } 2292 2293 /* 2294 * Alloc a vreq for this bp 2295 * bp->av_back contains the pointer to the vreq upon return 2296 */ 2297 static v_req_t * 2298 vreq_get(xdf_t *vdp, buf_t *bp) 2299 { 2300 v_req_t *vreq = NULL; 2301 2302 ASSERT(BP2VREQ(bp) == NULL); 2303 2304 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 2305 if (vreq == NULL) { 2306 if (vdp->xdf_timeout_id == 0) 2307 /* restart I/O after one second */ 2308 vdp->xdf_timeout_id = 2309 timeout(xdf_timeout_handler, vdp, hz); 2310 return (NULL); 2311 } 2312 bzero(vreq, sizeof (v_req_t)); 2313 2314 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 2315 bp->av_back = (buf_t *)vreq; 2316 vreq->v_buf = bp; 2317 vreq->v_status = VREQ_INIT; 2318 /* init of other fields in vreq is up to the caller */ 2319 2320 return (vreq); 2321 } 2322 2323 static void 2324 vreq_free(xdf_t *vdp, v_req_t *vreq) 2325 { 2326 buf_t *bp = vreq->v_buf; 2327 2328 list_remove(&vdp->xdf_vreq_act, (void *)vreq); 2329 2330 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 2331 goto done; 2332 2333 switch (vreq->v_status) { 2334 case VREQ_DMAWIN_DONE: 2335 case VREQ_GS_ALLOCED: 2336 case VREQ_DMABUF_BOUND: 2337 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 2338 /*FALLTHRU*/ 2339 case VREQ_DMAMEM_ALLOCED: 2340 if (!ALIGNED_XFER(bp)) { 2341 ASSERT(vreq->v_abuf != NULL); 2342 if (!IS_ERROR(bp) && IS_READ(bp)) 2343 bcopy(vreq->v_abuf, bp->b_un.b_addr, 2344 bp->b_bcount); 2345 ddi_dma_mem_free(&vreq->v_align); 2346 } 2347 /*FALLTHRU*/ 2348 case VREQ_MEMDMAHDL_ALLOCED: 2349 if (!ALIGNED_XFER(bp)) 2350 ddi_dma_free_handle(&vreq->v_memdmahdl); 2351 /*FALLTHRU*/ 2352 case VREQ_DMAHDL_ALLOCED: 2353 ddi_dma_free_handle(&vreq->v_dmahdl); 2354 break; 2355 default: 2356 break; 2357 } 2358 done: 2359 vreq->v_buf->av_back = NULL; 2360 kmem_cache_free(xdf_vreq_cache, vreq); 2361 } 2362 2363 /* 2364 * Initalize the DMA and grant table resources for the buf 2365 */ 2366 static int 2367 vreq_setup(xdf_t *vdp, v_req_t *vreq) 2368 { 2369 int rc; 2370 ddi_dma_attr_t dmaattr; 2371 uint_t ndcs, ndws; 2372 ddi_dma_handle_t dh; 2373 ddi_dma_handle_t mdh; 2374 ddi_dma_cookie_t dc; 2375 ddi_acc_handle_t abh; 2376 caddr_t aba; 2377 ge_slot_t *gs; 2378 size_t bufsz; 2379 off_t off; 2380 size_t sz; 2381 buf_t *bp = vreq->v_buf; 2382 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 2383 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 2384 2385 switch (vreq->v_status) { 2386 case VREQ_INIT: 2387 if (IS_FLUSH_DISKCACHE(bp)) { 2388 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2389 DPRINTF(DMA_DBG, ( 2390 "xdf@%s: get ge_slotfailed\n", 2391 ddi_get_name_addr(vdp->xdf_dip))); 2392 return (DDI_FAILURE); 2393 } 2394 vreq->v_blkno = 0; 2395 vreq->v_nslots = 1; 2396 vreq->v_gs = gs; 2397 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 2398 vreq->v_status = VREQ_GS_ALLOCED; 2399 gs->vreq = vreq; 2400 return (DDI_SUCCESS); 2401 } 2402 2403 if (IS_WRITE_BARRIER(vdp, bp)) 2404 vreq->v_flush_diskcache = WRITE_BARRIER; 2405 vreq->v_blkno = bp->b_blkno + 2406 (diskaddr_t)(uintptr_t)bp->b_private; 2407 bp->b_private = NULL; 2408 /* See if we wrote new data to our flush block */ 2409 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 2410 check_fbwrite(vdp, bp, vreq->v_blkno); 2411 vreq->v_status = VREQ_INIT_DONE; 2412 /*FALLTHRU*/ 2413 2414 case VREQ_INIT_DONE: 2415 /* 2416 * alloc DMA handle 2417 */ 2418 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 2419 xdf_dmacallback, (caddr_t)vdp, &dh); 2420 if (rc != DDI_SUCCESS) { 2421 SETDMACBON(vdp); 2422 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 2423 ddi_get_name_addr(vdp->xdf_dip))); 2424 return (DDI_FAILURE); 2425 } 2426 2427 vreq->v_dmahdl = dh; 2428 vreq->v_status = VREQ_DMAHDL_ALLOCED; 2429 /*FALLTHRU*/ 2430 2431 case VREQ_DMAHDL_ALLOCED: 2432 /* 2433 * alloc dma handle for 512-byte aligned buf 2434 */ 2435 if (!ALIGNED_XFER(bp)) { 2436 /* 2437 * XXPV: we need to temporarily enlarge the seg 2438 * boundary and s/g length to work round CR6381968 2439 */ 2440 dmaattr = xb_dma_attr; 2441 dmaattr.dma_attr_seg = (uint64_t)-1; 2442 dmaattr.dma_attr_sgllen = INT_MAX; 2443 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 2444 xdf_dmacallback, (caddr_t)vdp, &mdh); 2445 if (rc != DDI_SUCCESS) { 2446 SETDMACBON(vdp); 2447 DPRINTF(DMA_DBG, ("xdf@%s: unaligned buf DMA" 2448 "handle alloc failed\n", 2449 ddi_get_name_addr(vdp->xdf_dip))); 2450 return (DDI_FAILURE); 2451 } 2452 vreq->v_memdmahdl = mdh; 2453 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 2454 } 2455 /*FALLTHRU*/ 2456 2457 case VREQ_MEMDMAHDL_ALLOCED: 2458 /* 2459 * alloc 512-byte aligned buf 2460 */ 2461 if (!ALIGNED_XFER(bp)) { 2462 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 2463 bp_mapin(bp); 2464 2465 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 2466 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 2467 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 2468 &aba, &bufsz, &abh); 2469 if (rc != DDI_SUCCESS) { 2470 SETDMACBON(vdp); 2471 DPRINTF(DMA_DBG, ( 2472 "xdf@%s: DMA mem allocation failed\n", 2473 ddi_get_name_addr(vdp->xdf_dip))); 2474 return (DDI_FAILURE); 2475 } 2476 2477 vreq->v_abuf = aba; 2478 vreq->v_align = abh; 2479 vreq->v_status = VREQ_DMAMEM_ALLOCED; 2480 2481 ASSERT(bufsz >= bp->b_bcount); 2482 if (!IS_READ(bp)) 2483 bcopy(bp->b_un.b_addr, vreq->v_abuf, 2484 bp->b_bcount); 2485 } 2486 /*FALLTHRU*/ 2487 2488 case VREQ_DMAMEM_ALLOCED: 2489 /* 2490 * dma bind 2491 */ 2492 if (ALIGNED_XFER(bp)) { 2493 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 2494 dma_flags, xdf_dmacallback, (caddr_t)vdp, 2495 &dc, &ndcs); 2496 } else { 2497 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 2498 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 2499 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 2500 } 2501 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 2502 /* get num of dma windows */ 2503 if (rc == DDI_DMA_PARTIAL_MAP) { 2504 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 2505 ASSERT(rc == DDI_SUCCESS); 2506 } else { 2507 ndws = 1; 2508 } 2509 } else { 2510 SETDMACBON(vdp); 2511 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 2512 ddi_get_name_addr(vdp->xdf_dip))); 2513 return (DDI_FAILURE); 2514 } 2515 2516 vreq->v_dmac = dc; 2517 vreq->v_dmaw = 0; 2518 vreq->v_ndmacs = ndcs; 2519 vreq->v_ndmaws = ndws; 2520 vreq->v_nslots = ndws; 2521 vreq->v_status = VREQ_DMABUF_BOUND; 2522 /*FALLTHRU*/ 2523 2524 case VREQ_DMABUF_BOUND: 2525 /* 2526 * get ge_slot, callback is set upon failure from gs_get(), 2527 * if not set previously 2528 */ 2529 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2530 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2531 ddi_get_name_addr(vdp->xdf_dip))); 2532 return (DDI_FAILURE); 2533 } 2534 2535 vreq->v_gs = gs; 2536 gs->vreq = vreq; 2537 vreq->v_status = VREQ_GS_ALLOCED; 2538 break; 2539 2540 case VREQ_GS_ALLOCED: 2541 /* nothing need to be done */ 2542 break; 2543 2544 case VREQ_DMAWIN_DONE: 2545 /* 2546 * move to the next dma window 2547 */ 2548 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 2549 2550 /* get a ge_slot for this DMA window */ 2551 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2552 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2553 ddi_get_name_addr(vdp->xdf_dip))); 2554 return (DDI_FAILURE); 2555 } 2556 2557 vreq->v_gs = gs; 2558 gs->vreq = vreq; 2559 vreq->v_dmaw++; 2560 rc = ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 2561 &vreq->v_dmac, &vreq->v_ndmacs); 2562 ASSERT(rc == DDI_SUCCESS); 2563 vreq->v_status = VREQ_GS_ALLOCED; 2564 break; 2565 2566 default: 2567 return (DDI_FAILURE); 2568 } 2569 2570 return (DDI_SUCCESS); 2571 } 2572 2573 static ge_slot_t * 2574 gs_get(xdf_t *vdp, int isread) 2575 { 2576 grant_ref_t gh; 2577 ge_slot_t *gs; 2578 2579 /* try to alloc GTEs needed in this slot, first */ 2580 if (gnttab_alloc_grant_references( 2581 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 2582 if (vdp->xdf_gnt_callback.next == NULL) { 2583 SETDMACBON(vdp); 2584 gnttab_request_free_callback( 2585 &vdp->xdf_gnt_callback, 2586 (void (*)(void *))xdf_dmacallback, 2587 (void *)vdp, 2588 BLKIF_MAX_SEGMENTS_PER_REQUEST); 2589 } 2590 return (NULL); 2591 } 2592 2593 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 2594 if (gs == NULL) { 2595 gnttab_free_grant_references(gh); 2596 if (vdp->xdf_timeout_id == 0) 2597 /* restart I/O after one second */ 2598 vdp->xdf_timeout_id = 2599 timeout(xdf_timeout_handler, vdp, hz); 2600 return (NULL); 2601 } 2602 2603 /* init gs_slot */ 2604 list_insert_head(&vdp->xdf_gs_act, (void *)gs); 2605 gs->oeid = vdp->xdf_peer; 2606 gs->isread = isread; 2607 gs->ghead = gh; 2608 gs->ngrefs = 0; 2609 2610 return (gs); 2611 } 2612 2613 static void 2614 gs_free(xdf_t *vdp, ge_slot_t *gs) 2615 { 2616 int i; 2617 grant_ref_t *gp = gs->ge; 2618 int ngrefs = gs->ngrefs; 2619 boolean_t isread = gs->isread; 2620 2621 list_remove(&vdp->xdf_gs_act, (void *)gs); 2622 2623 /* release all grant table entry resources used in this slot */ 2624 for (i = 0; i < ngrefs; i++, gp++) 2625 gnttab_end_foreign_access(*gp, !isread, 0); 2626 gnttab_free_grant_references(gs->ghead); 2627 2628 kmem_cache_free(xdf_gs_cache, (void *)gs); 2629 } 2630 2631 static grant_ref_t 2632 gs_grant(ge_slot_t *gs, mfn_t mfn) 2633 { 2634 grant_ref_t gr = gnttab_claim_grant_reference(&gs->ghead); 2635 2636 ASSERT(gr != -1); 2637 ASSERT(gs->ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 2638 gs->ge[gs->ngrefs++] = gr; 2639 gnttab_grant_foreign_access_ref(gr, gs->oeid, mfn, !gs->isread); 2640 2641 return (gr); 2642 } 2643 2644 static void 2645 unexpectedie(xdf_t *vdp) 2646 { 2647 /* clean up I/Os in ring that have responses */ 2648 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 2649 mutex_exit(&vdp->xdf_dev_lk); 2650 (void) xdf_intr((caddr_t)vdp); 2651 mutex_enter(&vdp->xdf_dev_lk); 2652 } 2653 2654 /* free up all grant table entries */ 2655 while (!list_is_empty(&vdp->xdf_gs_act)) 2656 gs_free(vdp, list_head(&vdp->xdf_gs_act)); 2657 2658 /* 2659 * move bp back to active list orderly 2660 * vreq_busy is updated in vreq_free() 2661 */ 2662 while (!list_is_empty(&vdp->xdf_vreq_act)) { 2663 v_req_t *vreq = list_head(&vdp->xdf_vreq_act); 2664 buf_t *bp = vreq->v_buf; 2665 2666 bp->av_back = NULL; 2667 bp->b_resid = bp->b_bcount; 2668 if (vdp->xdf_f_act == NULL) { 2669 vdp->xdf_f_act = vdp->xdf_l_act = bp; 2670 } else { 2671 /* move to the head of list */ 2672 bp->av_forw = vdp->xdf_f_act; 2673 vdp->xdf_f_act = bp; 2674 } 2675 if (vdp->xdf_xdev_iostat != NULL) 2676 kstat_runq_back_to_waitq( 2677 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 2678 vreq_free(vdp, vreq); 2679 } 2680 } 2681 2682 static void 2683 xdfmin(struct buf *bp) 2684 { 2685 if (bp->b_bcount > xdf_maxphys) 2686 bp->b_bcount = xdf_maxphys; 2687 } 2688 2689 void 2690 xdf_kstat_delete(dev_info_t *dip) 2691 { 2692 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2693 kstat_t *kstat; 2694 2695 /* 2696 * The locking order here is xdf_iostat_lk and then xdf_dev_lk. 2697 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer 2698 * and the contents of the our kstat. xdf_iostat_lk is used 2699 * to protect the allocation and freeing of the actual kstat. 2700 * xdf_dev_lk can't be used for this purpose because kstat 2701 * readers use it to access the contents of the kstat and 2702 * hence it can't be held when calling kstat_delete(). 2703 */ 2704 mutex_enter(&vdp->xdf_iostat_lk); 2705 mutex_enter(&vdp->xdf_dev_lk); 2706 2707 if (vdp->xdf_xdev_iostat == NULL) { 2708 mutex_exit(&vdp->xdf_dev_lk); 2709 mutex_exit(&vdp->xdf_iostat_lk); 2710 return; 2711 } 2712 2713 kstat = vdp->xdf_xdev_iostat; 2714 vdp->xdf_xdev_iostat = NULL; 2715 mutex_exit(&vdp->xdf_dev_lk); 2716 2717 kstat_delete(kstat); 2718 mutex_exit(&vdp->xdf_iostat_lk); 2719 } 2720 2721 int 2722 xdf_kstat_create(dev_info_t *dip, char *ks_module, int ks_instance) 2723 { 2724 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2725 2726 /* See comment about locking in xdf_kstat_delete(). */ 2727 mutex_enter(&vdp->xdf_iostat_lk); 2728 mutex_enter(&vdp->xdf_dev_lk); 2729 2730 if (vdp->xdf_xdev_iostat != NULL) { 2731 mutex_exit(&vdp->xdf_dev_lk); 2732 mutex_exit(&vdp->xdf_iostat_lk); 2733 return (-1); 2734 } 2735 2736 if ((vdp->xdf_xdev_iostat = kstat_create( 2737 ks_module, ks_instance, NULL, "disk", 2738 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 2739 mutex_exit(&vdp->xdf_dev_lk); 2740 mutex_exit(&vdp->xdf_iostat_lk); 2741 return (-1); 2742 } 2743 2744 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 2745 kstat_install(vdp->xdf_xdev_iostat); 2746 mutex_exit(&vdp->xdf_dev_lk); 2747 mutex_exit(&vdp->xdf_iostat_lk); 2748 2749 return (0); 2750 } 2751 2752 #if defined(XPV_HVM_DRIVER) 2753 2754 typedef struct xdf_hvm_entry { 2755 list_node_t xdf_he_list; 2756 char *xdf_he_path; 2757 dev_info_t *xdf_he_dip; 2758 } xdf_hvm_entry_t; 2759 2760 static list_t xdf_hvm_list; 2761 static kmutex_t xdf_hvm_list_lock; 2762 2763 static xdf_hvm_entry_t * 2764 i_xdf_hvm_find(char *path, dev_info_t *dip) 2765 { 2766 xdf_hvm_entry_t *i; 2767 2768 ASSERT((path != NULL) || (dip != NULL)); 2769 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); 2770 2771 i = list_head(&xdf_hvm_list); 2772 while (i != NULL) { 2773 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { 2774 i = list_next(&xdf_hvm_list, i); 2775 continue; 2776 } 2777 if ((dip != NULL) && (i->xdf_he_dip != dip)) { 2778 i = list_next(&xdf_hvm_list, i); 2779 continue; 2780 } 2781 break; 2782 } 2783 return (i); 2784 } 2785 2786 dev_info_t * 2787 xdf_hvm_hold(char *path) 2788 { 2789 xdf_hvm_entry_t *i; 2790 dev_info_t *dip; 2791 2792 mutex_enter(&xdf_hvm_list_lock); 2793 i = i_xdf_hvm_find(path, NULL); 2794 if (i == NULL) { 2795 mutex_exit(&xdf_hvm_list_lock); 2796 return (B_FALSE); 2797 } 2798 ndi_hold_devi(dip = i->xdf_he_dip); 2799 mutex_exit(&xdf_hvm_list_lock); 2800 return (dip); 2801 } 2802 2803 static void 2804 xdf_hvm_add(dev_info_t *dip) 2805 { 2806 xdf_hvm_entry_t *i; 2807 char *path; 2808 2809 /* figure out the path for the dip */ 2810 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 2811 (void) ddi_pathname(dip, path); 2812 2813 i = kmem_alloc(sizeof (*i), KM_SLEEP); 2814 i->xdf_he_dip = dip; 2815 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); 2816 2817 mutex_enter(&xdf_hvm_list_lock); 2818 ASSERT(i_xdf_hvm_find(path, NULL) == NULL); 2819 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); 2820 list_insert_head(&xdf_hvm_list, i); 2821 mutex_exit(&xdf_hvm_list_lock); 2822 2823 kmem_free(path, MAXPATHLEN); 2824 } 2825 2826 static void 2827 xdf_hvm_rm(dev_info_t *dip) 2828 { 2829 xdf_hvm_entry_t *i; 2830 2831 mutex_enter(&xdf_hvm_list_lock); 2832 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); 2833 list_remove(&xdf_hvm_list, i); 2834 mutex_exit(&xdf_hvm_list_lock); 2835 2836 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); 2837 kmem_free(i, sizeof (*i)); 2838 } 2839 2840 static void 2841 xdf_hvm_init(void) 2842 { 2843 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), 2844 offsetof(xdf_hvm_entry_t, xdf_he_list)); 2845 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); 2846 } 2847 2848 static void 2849 xdf_hvm_fini(void) 2850 { 2851 ASSERT(list_head(&xdf_hvm_list) == NULL); 2852 list_destroy(&xdf_hvm_list); 2853 mutex_destroy(&xdf_hvm_list_lock); 2854 } 2855 2856 int 2857 xdf_hvm_connect(dev_info_t *dip) 2858 { 2859 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2860 int rv; 2861 2862 /* do cv_wait until connected or failed */ 2863 mutex_enter(&vdp->xdf_dev_lk); 2864 rv = xdf_connect(vdp, B_TRUE); 2865 mutex_exit(&vdp->xdf_dev_lk); 2866 return ((rv == XD_READY) ? 0 : -1); 2867 } 2868 2869 int 2870 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2871 { 2872 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2873 2874 /* sanity check the requested physical geometry */ 2875 mutex_enter(&vdp->xdf_dev_lk); 2876 if ((geomp->g_secsize != XB_BSIZE) || 2877 (geomp->g_capacity == 0)) { 2878 mutex_exit(&vdp->xdf_dev_lk); 2879 return (EINVAL); 2880 } 2881 2882 /* 2883 * If we've already connected to the backend device then make sure 2884 * we're not defining a physical geometry larger than our backend 2885 * device. 2886 */ 2887 if ((vdp->xdf_xdev_nblocks != 0) && 2888 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { 2889 mutex_exit(&vdp->xdf_dev_lk); 2890 return (EINVAL); 2891 } 2892 2893 vdp->xdf_pgeom = *geomp; 2894 mutex_exit(&vdp->xdf_dev_lk); 2895 2896 /* force a re-validation */ 2897 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 2898 2899 return (0); 2900 } 2901 2902 #endif /* XPV_HVM_DRIVER */ 2903