1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * xdf.c - Xen Virtual Block Device Driver 29 * TODO: 30 * - support alternate block size (currently only DEV_BSIZE supported) 31 * - revalidate geometry for removable devices 32 */ 33 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/conf.h> 37 #include <sys/cmlb.h> 38 #include <sys/dkio.h> 39 #include <sys/promif.h> 40 #include <sys/sysmacros.h> 41 #include <sys/kstat.h> 42 #include <sys/mach_mmu.h> 43 #ifdef XPV_HVM_DRIVER 44 #include <sys/xpv_support.h> 45 #include <sys/sunndi.h> 46 #endif /* XPV_HVM_DRIVER */ 47 #include <public/io/xenbus.h> 48 #include <xen/sys/xenbus_impl.h> 49 #include <xen/sys/xendev.h> 50 #include <sys/gnttab.h> 51 #include <sys/scsi/generic/inquiry.h> 52 #include <xen/io/blkif_impl.h> 53 #include <io/xdf.h> 54 55 #define FLUSH_DISKCACHE 0x1 56 #define WRITE_BARRIER 0x2 57 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 58 #define USE_WRITE_BARRIER(vdp) \ 59 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 60 #define USE_FLUSH_DISKCACHE(vdp) \ 61 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 62 #define IS_WRITE_BARRIER(vdp, bp) \ 63 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 64 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 65 #define IS_FLUSH_DISKCACHE(bp) \ 66 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 67 68 static void *vbd_ss; 69 static kmem_cache_t *xdf_vreq_cache; 70 static kmem_cache_t *xdf_gs_cache; 71 static int xdf_maxphys = XB_MAXPHYS; 72 int xdfdebug = 0; 73 extern int do_polled_io; 74 diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 75 int xdf_barrier_flush_disable = 0; 76 77 /* 78 * dev_ops and cb_ops entrypoints 79 */ 80 static int xdf_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 81 static int xdf_attach(dev_info_t *, ddi_attach_cmd_t); 82 static int xdf_detach(dev_info_t *, ddi_detach_cmd_t); 83 static int xdf_reset(dev_info_t *, ddi_reset_cmd_t); 84 static int xdf_open(dev_t *, int, int, cred_t *); 85 static int xdf_close(dev_t, int, int, struct cred *); 86 static int xdf_strategy(struct buf *); 87 static int xdf_read(dev_t, struct uio *, cred_t *); 88 static int xdf_aread(dev_t, struct aio_req *, cred_t *); 89 static int xdf_write(dev_t, struct uio *, cred_t *); 90 static int xdf_awrite(dev_t, struct aio_req *, cred_t *); 91 static int xdf_dump(dev_t, caddr_t, daddr_t, int); 92 static int xdf_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 93 static uint_t xdf_intr(caddr_t); 94 static int xdf_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 95 caddr_t, int *); 96 97 /* 98 * misc private functions 99 */ 100 static int xdf_suspend(dev_info_t *); 101 static int xdf_resume(dev_info_t *); 102 static int xdf_start_connect(xdf_t *); 103 static int xdf_start_disconnect(xdf_t *); 104 static int xdf_post_connect(xdf_t *); 105 static void xdf_post_disconnect(xdf_t *); 106 static void xdf_oe_change(dev_info_t *, ddi_eventcookie_t, void *, void *); 107 static void xdf_iostart(xdf_t *); 108 static void xdf_iofini(xdf_t *, uint64_t, int); 109 static int xdf_prepare_rreq(xdf_t *, struct buf *, blkif_request_t *); 110 static int xdf_drain_io(xdf_t *); 111 static boolean_t xdf_isopen(xdf_t *, int); 112 static int xdf_check_state_transition(xdf_t *, XenbusState); 113 static int xdf_connect(xdf_t *, boolean_t); 114 static int xdf_dmacallback(caddr_t); 115 static void xdf_timeout_handler(void *); 116 static uint_t xdf_iorestart(caddr_t); 117 static v_req_t *vreq_get(xdf_t *, buf_t *); 118 static void vreq_free(xdf_t *, v_req_t *); 119 static int vreq_setup(xdf_t *, v_req_t *); 120 static ge_slot_t *gs_get(xdf_t *, int); 121 static void gs_free(xdf_t *, ge_slot_t *); 122 static grant_ref_t gs_grant(ge_slot_t *, mfn_t); 123 static void unexpectedie(xdf_t *); 124 static void xdfmin(struct buf *); 125 static void xdf_synthetic_pgeom(dev_info_t *, cmlb_geom_t *); 126 extern int xdf_kstat_create(dev_info_t *, char *, int); 127 extern void xdf_kstat_delete(dev_info_t *); 128 129 #if defined(XPV_HVM_DRIVER) 130 static void xdf_hvm_add(dev_info_t *); 131 static void xdf_hvm_rm(dev_info_t *); 132 static void xdf_hvm_init(void); 133 static void xdf_hvm_fini(void); 134 #endif /* XPV_HVM_DRIVER */ 135 136 static struct cb_ops xdf_cbops = { 137 xdf_open, 138 xdf_close, 139 xdf_strategy, 140 nodev, 141 xdf_dump, 142 xdf_read, 143 xdf_write, 144 xdf_ioctl, 145 nodev, 146 nodev, 147 nodev, 148 nochpoll, 149 xdf_prop_op, 150 NULL, 151 D_MP | D_NEW | D_64BIT, 152 CB_REV, 153 xdf_aread, 154 xdf_awrite 155 }; 156 157 struct dev_ops xdf_devops = { 158 DEVO_REV, /* devo_rev */ 159 0, /* devo_refcnt */ 160 xdf_getinfo, /* devo_getinfo */ 161 nulldev, /* devo_identify */ 162 nulldev, /* devo_probe */ 163 xdf_attach, /* devo_attach */ 164 xdf_detach, /* devo_detach */ 165 xdf_reset, /* devo_reset */ 166 &xdf_cbops, /* devo_cb_ops */ 167 (struct bus_ops *)NULL /* devo_bus_ops */ 168 }; 169 170 static struct modldrv modldrv = { 171 &mod_driverops, /* Type of module. This one is a driver */ 172 "virtual block driver", /* short description */ 173 &xdf_devops /* driver specific ops */ 174 }; 175 176 static struct modlinkage xdf_modlinkage = { 177 MODREV_1, (void *)&modldrv, NULL 178 }; 179 180 /* 181 * I/O buffer DMA attributes 182 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 183 */ 184 static ddi_dma_attr_t xb_dma_attr = { 185 DMA_ATTR_V0, 186 (uint64_t)0, /* lowest address */ 187 (uint64_t)0xffffffffffffffff, /* highest usable address */ 188 (uint64_t)0xffffff, /* DMA counter limit max */ 189 (uint64_t)XB_BSIZE, /* alignment in bytes */ 190 XB_BSIZE - 1, /* bitmap of burst sizes */ 191 XB_BSIZE, /* min transfer */ 192 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 193 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 194 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 195 XB_BSIZE, /* granularity */ 196 0, /* flags (reserved) */ 197 }; 198 199 static ddi_device_acc_attr_t xc_acc_attr = { 200 DDI_DEVICE_ATTR_V0, 201 DDI_NEVERSWAP_ACC, 202 DDI_STRICTORDER_ACC 203 }; 204 205 /* callbacks from commmon label */ 206 207 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); 208 int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 209 210 static cmlb_tg_ops_t xdf_lb_ops = { 211 TG_DK_OPS_VERSION_1, 212 xdf_lb_rdwr, 213 xdf_lb_getinfo 214 }; 215 216 int 217 _init(void) 218 { 219 int rc; 220 221 if ((rc = ddi_soft_state_init(&vbd_ss, sizeof (xdf_t), 0)) != 0) 222 return (rc); 223 224 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 225 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 226 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 227 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 228 229 #if defined(XPV_HVM_DRIVER) 230 xdf_hvm_init(); 231 #endif /* XPV_HVM_DRIVER */ 232 233 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 234 #if defined(XPV_HVM_DRIVER) 235 xdf_hvm_fini(); 236 #endif /* XPV_HVM_DRIVER */ 237 kmem_cache_destroy(xdf_vreq_cache); 238 kmem_cache_destroy(xdf_gs_cache); 239 ddi_soft_state_fini(&vbd_ss); 240 return (rc); 241 } 242 243 return (rc); 244 } 245 246 int 247 _fini(void) 248 { 249 250 int err; 251 if ((err = mod_remove(&xdf_modlinkage)) != 0) 252 return (err); 253 254 #if defined(XPV_HVM_DRIVER) 255 xdf_hvm_fini(); 256 #endif /* XPV_HVM_DRIVER */ 257 258 kmem_cache_destroy(xdf_vreq_cache); 259 kmem_cache_destroy(xdf_gs_cache); 260 ddi_soft_state_fini(&vbd_ss); 261 262 return (0); 263 } 264 265 int 266 _info(struct modinfo *modinfop) 267 { 268 return (mod_info(&xdf_modlinkage, modinfop)); 269 } 270 271 /*ARGSUSED*/ 272 static int 273 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 274 { 275 int instance; 276 xdf_t *vbdp; 277 278 instance = XDF_INST(getminor((dev_t)arg)); 279 280 switch (cmd) { 281 case DDI_INFO_DEVT2DEVINFO: 282 if ((vbdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) { 283 *rp = NULL; 284 return (DDI_FAILURE); 285 } 286 *rp = vbdp->xdf_dip; 287 return (DDI_SUCCESS); 288 289 case DDI_INFO_DEVT2INSTANCE: 290 *rp = (void *)(uintptr_t)instance; 291 return (DDI_SUCCESS); 292 293 default: 294 return (DDI_FAILURE); 295 } 296 } 297 298 static int 299 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 300 char *name, caddr_t valuep, int *lengthp) 301 { 302 xdf_t *vdp; 303 304 if ((vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(dip))) == NULL) 305 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 306 name, valuep, lengthp)); 307 308 return (cmlb_prop_op(vdp->xdf_vd_lbl, 309 dev, dip, prop_op, mod_flags, name, valuep, lengthp, 310 XDF_PART(getminor(dev)), NULL)); 311 } 312 313 static int 314 xdf_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 315 { 316 xdf_t *vdp; 317 ddi_iblock_cookie_t softibc; 318 int instance; 319 320 xdfdebug = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_NOTPROM, 321 "xdfdebug", 0); 322 323 switch (cmd) { 324 case DDI_ATTACH: 325 break; 326 327 case DDI_RESUME: 328 return (xdf_resume(devi)); 329 330 default: 331 return (DDI_FAILURE); 332 } 333 334 instance = ddi_get_instance(devi); 335 if (ddi_soft_state_zalloc(vbd_ss, instance) != DDI_SUCCESS) 336 return (DDI_FAILURE); 337 338 DPRINTF(DDI_DBG, ("xdf%d: attaching\n", instance)); 339 vdp = ddi_get_soft_state(vbd_ss, instance); 340 ddi_set_driver_private(devi, vdp); 341 vdp->xdf_dip = devi; 342 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 343 344 if (ddi_get_iblock_cookie(devi, 0, &vdp->xdf_ibc) != DDI_SUCCESS) { 345 cmn_err(CE_WARN, "xdf@%s: failed to get iblock cookie", 346 ddi_get_name_addr(devi)); 347 goto errout0; 348 } 349 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)vdp->xdf_ibc); 350 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)vdp->xdf_ibc); 351 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, 352 (void *)vdp->xdf_ibc); 353 354 if (ddi_get_soft_iblock_cookie(devi, DDI_SOFTINT_LOW, &softibc) 355 != DDI_SUCCESS) { 356 cmn_err(CE_WARN, "xdf@%s: failed to get softintr iblock cookie", 357 ddi_get_name_addr(devi)); 358 goto errout0; 359 } 360 if (ddi_add_softintr(devi, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 361 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 362 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 363 ddi_get_name_addr(devi)); 364 goto errout0; 365 } 366 367 #if !defined(XPV_HVM_DRIVER) 368 /* create kstat for iostat(1M) */ 369 if (xdf_kstat_create(devi, "xdf", instance) != 0) { 370 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 371 ddi_get_name_addr(devi)); 372 goto errout0; 373 } 374 #endif /* !XPV_HVM_DRIVER */ 375 376 /* driver handles kernel-issued IOCTLs */ 377 if (ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP, 378 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 379 cmn_err(CE_WARN, "xdf@%s: cannot create DDI_KERNEL_IOCTL prop", 380 ddi_get_name_addr(devi)); 381 goto errout0; 382 } 383 384 /* 385 * Initialize the physical geometry stucture. Note that currently 386 * we don't know the size of the backend device so the number 387 * of blocks on the device will be initialized to zero. Once 388 * we connect to the backend device we'll update the physical 389 * geometry to reflect the real size of the device. 390 */ 391 xdf_synthetic_pgeom(devi, &vdp->xdf_pgeom); 392 393 /* 394 * create default device minor nodes: non-removable disk 395 * we will adjust minor nodes after we are connected w/ backend 396 */ 397 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 398 if (cmlb_attach(devi, &xdf_lb_ops, DTYPE_DIRECT, 0, 1, 399 DDI_NT_BLOCK_XVMD, 400 #if defined(XPV_HVM_DRIVER) 401 CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT | 402 CMLB_INTERNAL_MINOR_NODES, 403 #else /* !XPV_HVM_DRIVER */ 404 CMLB_FAKE_LABEL_ONE_PARTITION, 405 #endif /* !XPV_HVM_DRIVER */ 406 vdp->xdf_vd_lbl, NULL) != 0) { 407 cmn_err(CE_WARN, "xdf@%s: default cmlb attach failed", 408 ddi_get_name_addr(devi)); 409 goto errout0; 410 } 411 412 /* 413 * We ship with cache-enabled disks 414 */ 415 vdp->xdf_wce = 1; 416 417 mutex_enter(&vdp->xdf_cb_lk); 418 419 /* Watch backend XenbusState change */ 420 if (xvdi_add_event_handler(devi, XS_OE_STATE, 421 xdf_oe_change) != DDI_SUCCESS) { 422 mutex_exit(&vdp->xdf_cb_lk); 423 goto errout0; 424 } 425 426 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 427 cmn_err(CE_WARN, "xdf@%s: start connection failed", 428 ddi_get_name_addr(devi)); 429 (void) xdf_start_disconnect(vdp); 430 mutex_exit(&vdp->xdf_cb_lk); 431 goto errout1; 432 } 433 434 mutex_exit(&vdp->xdf_cb_lk); 435 436 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 437 offsetof(v_req_t, v_link)); 438 list_create(&vdp->xdf_gs_act, sizeof (ge_slot_t), 439 offsetof(ge_slot_t, link)); 440 441 #if defined(XPV_HVM_DRIVER) 442 xdf_hvm_add(devi); 443 444 (void) ddi_prop_update_int(DDI_DEV_T_NONE, devi, DDI_NO_AUTODETACH, 1); 445 446 /* 447 * Report our version to dom0. 448 */ 449 if (xenbus_printf(XBT_NULL, "hvmpv/xdf", "version", "%d", 450 HVMPV_XDF_VERS)) 451 cmn_err(CE_WARN, "xdf: couldn't write version\n"); 452 #endif /* XPV_HVM_DRIVER */ 453 454 ddi_report_dev(devi); 455 456 DPRINTF(DDI_DBG, ("xdf%d: attached\n", instance)); 457 458 return (DDI_SUCCESS); 459 460 errout1: 461 xvdi_remove_event_handler(devi, XS_OE_STATE); 462 errout0: 463 if (vdp->xdf_vd_lbl != NULL) { 464 cmlb_detach(vdp->xdf_vd_lbl, NULL); 465 cmlb_free_handle(&vdp->xdf_vd_lbl); 466 vdp->xdf_vd_lbl = NULL; 467 } 468 #if !defined(XPV_HVM_DRIVER) 469 xdf_kstat_delete(devi); 470 #endif /* !XPV_HVM_DRIVER */ 471 if (vdp->xdf_softintr_id != NULL) 472 ddi_remove_softintr(vdp->xdf_softintr_id); 473 if (vdp->xdf_ibc != NULL) { 474 mutex_destroy(&vdp->xdf_cb_lk); 475 mutex_destroy(&vdp->xdf_dev_lk); 476 } 477 cv_destroy(&vdp->xdf_dev_cv); 478 ddi_soft_state_free(vbd_ss, instance); 479 ddi_set_driver_private(devi, NULL); 480 ddi_prop_remove_all(devi); 481 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(devi)); 482 return (DDI_FAILURE); 483 } 484 485 static int 486 xdf_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 487 { 488 xdf_t *vdp; 489 int instance; 490 491 switch (cmd) { 492 493 case DDI_PM_SUSPEND: 494 break; 495 496 case DDI_SUSPEND: 497 return (xdf_suspend(devi)); 498 499 case DDI_DETACH: 500 break; 501 502 default: 503 return (DDI_FAILURE); 504 } 505 506 instance = ddi_get_instance(devi); 507 DPRINTF(DDI_DBG, ("xdf%d: detaching\n", instance)); 508 vdp = ddi_get_soft_state(vbd_ss, instance); 509 510 if (vdp == NULL) 511 return (DDI_FAILURE); 512 513 mutex_enter(&vdp->xdf_dev_lk); 514 if (xdf_isopen(vdp, -1)) { 515 mutex_exit(&vdp->xdf_dev_lk); 516 return (DDI_FAILURE); 517 } 518 519 if (vdp->xdf_status != XD_CLOSED) { 520 mutex_exit(&vdp->xdf_dev_lk); 521 return (DDI_FAILURE); 522 } 523 524 #if defined(XPV_HVM_DRIVER) 525 xdf_hvm_rm(devi); 526 #endif /* XPV_HVM_DRIVER */ 527 528 ASSERT(!ISDMACBON(vdp)); 529 mutex_exit(&vdp->xdf_dev_lk); 530 531 if (vdp->xdf_timeout_id != 0) 532 (void) untimeout(vdp->xdf_timeout_id); 533 534 xvdi_remove_event_handler(devi, XS_OE_STATE); 535 536 /* we'll support backend running in domU later */ 537 #ifdef DOMU_BACKEND 538 (void) xvdi_post_event(devi, XEN_HP_REMOVE); 539 #endif 540 541 list_destroy(&vdp->xdf_vreq_act); 542 list_destroy(&vdp->xdf_gs_act); 543 ddi_prop_remove_all(devi); 544 xdf_kstat_delete(devi); 545 ddi_remove_softintr(vdp->xdf_softintr_id); 546 ddi_set_driver_private(devi, NULL); 547 cv_destroy(&vdp->xdf_dev_cv); 548 mutex_destroy(&vdp->xdf_cb_lk); 549 mutex_destroy(&vdp->xdf_dev_lk); 550 if (vdp->xdf_cache_flush_block != NULL) 551 kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE); 552 ddi_soft_state_free(vbd_ss, instance); 553 return (DDI_SUCCESS); 554 } 555 556 static int 557 xdf_suspend(dev_info_t *devi) 558 { 559 xdf_t *vdp; 560 int instance; 561 enum xdf_state st; 562 563 instance = ddi_get_instance(devi); 564 565 if (xdfdebug & SUSRES_DBG) 566 xen_printf("xdf_suspend: xdf#%d\n", instance); 567 568 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 569 return (DDI_FAILURE); 570 571 xvdi_suspend(devi); 572 573 mutex_enter(&vdp->xdf_cb_lk); 574 mutex_enter(&vdp->xdf_dev_lk); 575 st = vdp->xdf_status; 576 /* change status to stop further I/O requests */ 577 if (st == XD_READY) 578 vdp->xdf_status = XD_SUSPEND; 579 mutex_exit(&vdp->xdf_dev_lk); 580 mutex_exit(&vdp->xdf_cb_lk); 581 582 /* make sure no more I/O responses left in the ring buffer */ 583 if ((st == XD_INIT) || (st == XD_READY)) { 584 #ifdef XPV_HVM_DRIVER 585 ec_unbind_evtchn(vdp->xdf_evtchn); 586 xvdi_free_evtchn(devi); 587 #else /* !XPV_HVM_DRIVER */ 588 (void) ddi_remove_intr(devi, 0, NULL); 589 #endif /* !XPV_HVM_DRIVER */ 590 (void) xdf_drain_io(vdp); 591 /* 592 * no need to teardown the ring buffer here 593 * it will be simply re-init'ed during resume when 594 * we call xvdi_alloc_ring 595 */ 596 } 597 598 if (xdfdebug & SUSRES_DBG) 599 xen_printf("xdf_suspend: SUCCESS\n"); 600 601 return (DDI_SUCCESS); 602 } 603 604 /*ARGSUSED*/ 605 static int 606 xdf_resume(dev_info_t *devi) 607 { 608 xdf_t *vdp; 609 int instance; 610 611 instance = ddi_get_instance(devi); 612 if (xdfdebug & SUSRES_DBG) 613 xen_printf("xdf_resume: xdf%d\n", instance); 614 615 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 616 return (DDI_FAILURE); 617 618 mutex_enter(&vdp->xdf_cb_lk); 619 620 if (xvdi_resume(devi) != DDI_SUCCESS) { 621 mutex_exit(&vdp->xdf_cb_lk); 622 return (DDI_FAILURE); 623 } 624 625 mutex_enter(&vdp->xdf_dev_lk); 626 ASSERT(vdp->xdf_status != XD_READY); 627 vdp->xdf_status = XD_UNKNOWN; 628 mutex_exit(&vdp->xdf_dev_lk); 629 630 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 631 mutex_exit(&vdp->xdf_cb_lk); 632 return (DDI_FAILURE); 633 } 634 635 mutex_exit(&vdp->xdf_cb_lk); 636 637 if (xdfdebug & SUSRES_DBG) 638 xen_printf("xdf_resume: done\n"); 639 return (DDI_SUCCESS); 640 } 641 642 /*ARGSUSED*/ 643 static int 644 xdf_reset(dev_info_t *devi, ddi_reset_cmd_t cmd) 645 { 646 xdf_t *vdp; 647 int instance; 648 649 instance = ddi_get_instance(devi); 650 DPRINTF(DDI_DBG, ("xdf%d: resetting\n", instance)); 651 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 652 return (DDI_FAILURE); 653 654 /* 655 * wait for any outstanding I/O to complete 656 */ 657 (void) xdf_drain_io(vdp); 658 659 DPRINTF(DDI_DBG, ("xdf%d: reset complete\n", instance)); 660 return (DDI_SUCCESS); 661 } 662 663 static int 664 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 665 { 666 minor_t minor; 667 xdf_t *vdp; 668 int part; 669 ulong_t parbit; 670 diskaddr_t p_blkct = 0; 671 boolean_t firstopen; 672 boolean_t nodelay; 673 674 minor = getminor(*devp); 675 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 676 return (ENXIO); 677 678 nodelay = (flag & (FNDELAY | FNONBLOCK)); 679 680 DPRINTF(DDI_DBG, ("xdf%d: opening\n", XDF_INST(minor))); 681 682 /* do cv_wait until connected or failed */ 683 mutex_enter(&vdp->xdf_dev_lk); 684 if (!nodelay && (xdf_connect(vdp, B_TRUE) != XD_READY)) { 685 mutex_exit(&vdp->xdf_dev_lk); 686 return (ENXIO); 687 } 688 689 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 690 mutex_exit(&vdp->xdf_dev_lk); 691 return (EROFS); 692 } 693 694 part = XDF_PART(minor); 695 parbit = 1 << part; 696 if ((vdp->xdf_vd_exclopen & parbit) || 697 ((flag & FEXCL) && xdf_isopen(vdp, part))) { 698 mutex_exit(&vdp->xdf_dev_lk); 699 return (EBUSY); 700 } 701 702 /* are we the first one to open this node? */ 703 firstopen = !xdf_isopen(vdp, -1); 704 705 if (otyp == OTYP_LYR) 706 vdp->xdf_vd_lyropen[part]++; 707 708 vdp->xdf_vd_open[otyp] |= parbit; 709 710 if (flag & FEXCL) 711 vdp->xdf_vd_exclopen |= parbit; 712 713 mutex_exit(&vdp->xdf_dev_lk); 714 715 /* force a re-validation */ 716 if (firstopen) 717 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 718 719 /* 720 * check size 721 * ignore CD/DVD which contains a zero-sized s0 722 */ 723 if (!nodelay && !XD_IS_CD(vdp) && 724 ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 725 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0))) { 726 (void) xdf_close(*devp, flag, otyp, credp); 727 return (ENXIO); 728 } 729 730 return (0); 731 } 732 733 /*ARGSUSED*/ 734 static int 735 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 736 { 737 minor_t minor; 738 xdf_t *vdp; 739 int part; 740 ulong_t parbit; 741 742 minor = getminor(dev); 743 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 744 return (ENXIO); 745 746 mutex_enter(&vdp->xdf_dev_lk); 747 part = XDF_PART(minor); 748 if (!xdf_isopen(vdp, part)) { 749 mutex_exit(&vdp->xdf_dev_lk); 750 return (ENXIO); 751 } 752 parbit = 1 << part; 753 754 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); 755 if (otyp == OTYP_LYR) { 756 ASSERT(vdp->xdf_vd_lyropen[part] > 0); 757 if (--vdp->xdf_vd_lyropen[part] == 0) 758 vdp->xdf_vd_open[otyp] &= ~parbit; 759 } else { 760 vdp->xdf_vd_open[otyp] &= ~parbit; 761 } 762 vdp->xdf_vd_exclopen &= ~parbit; 763 764 mutex_exit(&vdp->xdf_dev_lk); 765 return (0); 766 } 767 768 static int 769 xdf_strategy(struct buf *bp) 770 { 771 xdf_t *vdp; 772 minor_t minor; 773 diskaddr_t p_blkct, p_blkst; 774 ulong_t nblks; 775 int part; 776 777 minor = getminor(bp->b_edev); 778 part = XDF_PART(minor); 779 780 vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)); 781 if ((vdp == NULL) || !xdf_isopen(vdp, part)) { 782 bioerror(bp, ENXIO); 783 bp->b_resid = bp->b_bcount; 784 biodone(bp); 785 return (0); 786 } 787 788 /* Check for writes to a read only device */ 789 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 790 bioerror(bp, EROFS); 791 bp->b_resid = bp->b_bcount; 792 biodone(bp); 793 return (0); 794 } 795 796 /* Check if this I/O is accessing a partition or the entire disk */ 797 if ((long)bp->b_private == XB_SLICE_NONE) { 798 /* This I/O is using an absolute offset */ 799 p_blkct = vdp->xdf_xdev_nblocks; 800 p_blkst = 0; 801 } else { 802 /* This I/O is using a partition relative offset */ 803 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 804 &p_blkst, NULL, NULL, NULL)) { 805 bioerror(bp, ENXIO); 806 bp->b_resid = bp->b_bcount; 807 biodone(bp); 808 return (0); 809 } 810 } 811 812 /* check for a starting block beyond the disk or partition limit */ 813 if (bp->b_blkno > p_blkct) { 814 DPRINTF(IO_DBG, ("xdf: block %lld exceeds VBD size %"PRIu64, 815 (longlong_t)bp->b_blkno, (uint64_t)p_blkct)); 816 bioerror(bp, EINVAL); 817 bp->b_resid = bp->b_bcount; 818 biodone(bp); 819 return (0); 820 } 821 822 /* Legacy: don't set error flag at this case */ 823 if (bp->b_blkno == p_blkct) { 824 bp->b_resid = bp->b_bcount; 825 biodone(bp); 826 return (0); 827 } 828 829 /* Adjust for partial transfer */ 830 nblks = bp->b_bcount >> XB_BSHIFT; 831 if ((bp->b_blkno + nblks) > p_blkct) { 832 bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT; 833 bp->b_bcount -= bp->b_resid; 834 } 835 836 DPRINTF(IO_DBG, ("xdf: strategy blk %lld len %lu\n", 837 (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount)); 838 839 /* Fix up the buf struct */ 840 bp->b_flags |= B_BUSY; 841 bp->av_forw = bp->av_back = NULL; /* not tagged with a v_req */ 842 bp->b_private = (void *)(uintptr_t)p_blkst; 843 844 mutex_enter(&vdp->xdf_dev_lk); 845 if (vdp->xdf_xdev_iostat != NULL) 846 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 847 if (vdp->xdf_f_act == NULL) { 848 vdp->xdf_f_act = vdp->xdf_l_act = bp; 849 } else { 850 vdp->xdf_l_act->av_forw = bp; 851 vdp->xdf_l_act = bp; 852 } 853 mutex_exit(&vdp->xdf_dev_lk); 854 855 xdf_iostart(vdp); 856 if (do_polled_io) 857 (void) xdf_drain_io(vdp); 858 return (0); 859 } 860 861 /*ARGSUSED*/ 862 static int 863 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 864 { 865 866 xdf_t *vdp; 867 minor_t minor; 868 diskaddr_t p_blkcnt; 869 int part; 870 871 minor = getminor(dev); 872 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 873 return (ENXIO); 874 875 DPRINTF(IO_DBG, ("xdf: read offset 0x%"PRIx64"\n", 876 (int64_t)uiop->uio_offset)); 877 878 part = XDF_PART(minor); 879 if (!xdf_isopen(vdp, part)) 880 return (ENXIO); 881 882 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 883 NULL, NULL, NULL, NULL)) 884 return (ENXIO); 885 886 if (U_INVAL(uiop)) 887 return (EINVAL); 888 889 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 890 } 891 892 /*ARGSUSED*/ 893 static int 894 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 895 { 896 xdf_t *vdp; 897 minor_t minor; 898 diskaddr_t p_blkcnt; 899 int part; 900 901 minor = getminor(dev); 902 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 903 return (ENXIO); 904 905 DPRINTF(IO_DBG, ("xdf: write offset 0x%"PRIx64"\n", 906 (int64_t)uiop->uio_offset)); 907 908 part = XDF_PART(minor); 909 if (!xdf_isopen(vdp, part)) 910 return (ENXIO); 911 912 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 913 NULL, NULL, NULL, NULL)) 914 return (ENXIO); 915 916 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 917 return (ENOSPC); 918 919 if (U_INVAL(uiop)) 920 return (EINVAL); 921 922 return (physio(xdf_strategy, NULL, dev, B_WRITE, minphys, uiop)); 923 } 924 925 /*ARGSUSED*/ 926 static int 927 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 928 { 929 xdf_t *vdp; 930 minor_t minor; 931 struct uio *uiop = aiop->aio_uio; 932 diskaddr_t p_blkcnt; 933 int part; 934 935 minor = getminor(dev); 936 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 937 return (ENXIO); 938 939 part = XDF_PART(minor); 940 if (!xdf_isopen(vdp, part)) 941 return (ENXIO); 942 943 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 944 NULL, NULL, NULL, NULL)) 945 return (ENXIO); 946 947 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 948 return (ENOSPC); 949 950 if (U_INVAL(uiop)) 951 return (EINVAL); 952 953 return (aphysio(xdf_strategy, anocancel, dev, B_READ, minphys, aiop)); 954 } 955 956 /*ARGSUSED*/ 957 static int 958 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 959 { 960 xdf_t *vdp; 961 minor_t minor; 962 struct uio *uiop = aiop->aio_uio; 963 diskaddr_t p_blkcnt; 964 int part; 965 966 minor = getminor(dev); 967 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 968 return (ENXIO); 969 970 part = XDF_PART(minor); 971 if (!xdf_isopen(vdp, part)) 972 return (ENXIO); 973 974 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 975 NULL, NULL, NULL, NULL)) 976 return (ENXIO); 977 978 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 979 return (ENOSPC); 980 981 if (U_INVAL(uiop)) 982 return (EINVAL); 983 984 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, minphys, aiop)); 985 } 986 987 static int 988 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 989 { 990 struct buf dumpbuf, *dbp; 991 xdf_t *vdp; 992 minor_t minor; 993 int err = 0; 994 int part; 995 diskaddr_t p_blkcnt, p_blkst; 996 997 minor = getminor(dev); 998 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 999 return (ENXIO); 1000 1001 DPRINTF(IO_DBG, ("xdf: dump addr (0x%p) blk (%ld) nblks (%d)\n", 1002 addr, blkno, nblk)); 1003 1004 part = XDF_PART(minor); 1005 if (!xdf_isopen(vdp, part)) 1006 return (ENXIO); 1007 1008 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 1009 NULL, NULL, NULL)) 1010 return (ENXIO); 1011 1012 if ((blkno + nblk) > p_blkcnt) { 1013 cmn_err(CE_WARN, "xdf: block %ld exceeds VBD size %"PRIu64, 1014 blkno + nblk, (uint64_t)p_blkcnt); 1015 return (EINVAL); 1016 } 1017 1018 dbp = &dumpbuf; 1019 bioinit(dbp); 1020 dbp->b_flags = B_BUSY; 1021 dbp->b_un.b_addr = addr; 1022 dbp->b_bcount = nblk << DEV_BSHIFT; 1023 dbp->b_blkno = blkno; 1024 dbp->b_edev = dev; 1025 dbp->b_private = (void *)(uintptr_t)p_blkst; 1026 1027 mutex_enter(&vdp->xdf_dev_lk); 1028 if (vdp->xdf_xdev_iostat != NULL) 1029 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1030 if (vdp->xdf_f_act == NULL) { 1031 vdp->xdf_f_act = vdp->xdf_l_act = dbp; 1032 } else { 1033 vdp->xdf_l_act->av_forw = dbp; 1034 vdp->xdf_l_act = dbp; 1035 } 1036 dbp->av_forw = NULL; 1037 dbp->av_back = NULL; 1038 mutex_exit(&vdp->xdf_dev_lk); 1039 xdf_iostart(vdp); 1040 err = xdf_drain_io(vdp); 1041 biofini(dbp); 1042 return (err); 1043 } 1044 1045 /*ARGSUSED*/ 1046 static int 1047 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 1048 int *rvalp) 1049 { 1050 int instance; 1051 xdf_t *vdp; 1052 minor_t minor; 1053 int part; 1054 1055 minor = getminor(dev); 1056 instance = XDF_INST(minor); 1057 1058 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 1059 return (ENXIO); 1060 1061 DPRINTF(IOCTL_DBG, ("xdf%d:ioctl: cmd %d (0x%x)\n", 1062 instance, cmd, cmd)); 1063 1064 part = XDF_PART(minor); 1065 if (!xdf_isopen(vdp, part)) 1066 return (ENXIO); 1067 1068 switch (cmd) { 1069 case DKIOCGMEDIAINFO: { 1070 struct dk_minfo media_info; 1071 1072 media_info.dki_lbsize = DEV_BSIZE; 1073 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; 1074 media_info.dki_media_type = DK_FIXED_DISK; 1075 1076 if (ddi_copyout(&media_info, (void *)arg, 1077 sizeof (struct dk_minfo), mode)) { 1078 return (EFAULT); 1079 } else { 1080 return (0); 1081 } 1082 } 1083 1084 case DKIOCINFO: { 1085 struct dk_cinfo info; 1086 1087 /* controller information */ 1088 if (XD_IS_CD(vdp)) 1089 info.dki_ctype = DKC_CDROM; 1090 else 1091 info.dki_ctype = DKC_VBD; 1092 1093 info.dki_cnum = 0; 1094 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 1095 1096 /* unit information */ 1097 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 1098 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 1099 info.dki_flags = DKI_FMTVOL; 1100 info.dki_partition = part; 1101 info.dki_maxtransfer = maxphys / DEV_BSIZE; 1102 info.dki_addr = 0; 1103 info.dki_space = 0; 1104 info.dki_prio = 0; 1105 info.dki_vec = 0; 1106 1107 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 1108 return (EFAULT); 1109 else 1110 return (0); 1111 } 1112 1113 case DKIOCSTATE: { 1114 enum dkio_state dkstate = DKIO_INSERTED; 1115 if (ddi_copyout(&dkstate, (void *)arg, sizeof (dkstate), 1116 mode) != 0) 1117 return (EFAULT); 1118 return (0); 1119 } 1120 1121 /* 1122 * is media removable? 1123 */ 1124 case DKIOCREMOVABLE: { 1125 int i = XD_IS_RM(vdp) ? 1 : 0; 1126 if (ddi_copyout(&i, (caddr_t)arg, sizeof (int), mode)) 1127 return (EFAULT); 1128 return (0); 1129 } 1130 1131 case DKIOCG_PHYGEOM: 1132 case DKIOCG_VIRTGEOM: 1133 case DKIOCGGEOM: 1134 case DKIOCSGEOM: 1135 case DKIOCGAPART: 1136 case DKIOCSAPART: 1137 case DKIOCGVTOC: 1138 case DKIOCSVTOC: 1139 case DKIOCPARTINFO: 1140 case DKIOCGMBOOT: 1141 case DKIOCSMBOOT: 1142 case DKIOCGETEFI: 1143 case DKIOCSETEFI: 1144 case DKIOCPARTITION: { 1145 int rc; 1146 1147 rc = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 1148 rvalp, NULL); 1149 return (rc); 1150 } 1151 1152 case DKIOCGETWCE: 1153 if (ddi_copyout(&vdp->xdf_wce, (void *)arg, 1154 sizeof (vdp->xdf_wce), mode)) 1155 return (EFAULT); 1156 return (0); 1157 case DKIOCSETWCE: 1158 if (ddi_copyin((void *)arg, &vdp->xdf_wce, 1159 sizeof (vdp->xdf_wce), mode)) 1160 return (EFAULT); 1161 return (0); 1162 case DKIOCFLUSHWRITECACHE: { 1163 int rc; 1164 struct dk_callback *dkc = (struct dk_callback *)arg; 1165 1166 if (vdp->xdf_flush_supported) { 1167 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1168 NULL, 0, 0, (void *)dev); 1169 } else if (vdp->xdf_feature_barrier && 1170 !xdf_barrier_flush_disable) { 1171 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1172 vdp->xdf_cache_flush_block, xdf_flush_block, 1173 DEV_BSIZE, (void *)dev); 1174 } else { 1175 return (ENOTTY); 1176 } 1177 if ((mode & FKIOCTL) && (dkc != NULL) && 1178 (dkc->dkc_callback != NULL)) { 1179 (*dkc->dkc_callback)(dkc->dkc_cookie, rc); 1180 /* need to return 0 after calling callback */ 1181 rc = 0; 1182 } 1183 return (rc); 1184 } 1185 1186 default: 1187 return (ENOTTY); 1188 } 1189 } 1190 1191 /* 1192 * xdf interrupt handler 1193 */ 1194 static uint_t 1195 xdf_intr(caddr_t arg) 1196 { 1197 xdf_t *vdp = (xdf_t *)arg; 1198 xendev_ring_t *xbr; 1199 blkif_response_t *resp; 1200 int bioerr; 1201 uint64_t id; 1202 extern int do_polled_io; 1203 uint8_t op; 1204 uint16_t status; 1205 ddi_acc_handle_t acchdl; 1206 1207 mutex_enter(&vdp->xdf_dev_lk); 1208 1209 if ((xbr = vdp->xdf_xb_ring) == NULL) { 1210 mutex_exit(&vdp->xdf_dev_lk); 1211 return (DDI_INTR_UNCLAIMED); 1212 } 1213 1214 acchdl = vdp->xdf_xb_ring_hdl; 1215 1216 /* 1217 * complete all requests which have a response 1218 */ 1219 while (resp = xvdi_ring_get_response(xbr)) { 1220 id = ddi_get64(acchdl, &resp->id); 1221 op = ddi_get8(acchdl, &resp->operation); 1222 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 1223 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 1224 op, id, status)); 1225 1226 /* 1227 * XXPV - close connection to the backend and restart 1228 */ 1229 if (status != BLKIF_RSP_OKAY) { 1230 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 1231 ddi_get_name_addr(vdp->xdf_dip), 1232 (op == BLKIF_OP_READ) ? "reading" : "writing")); 1233 bioerr = EIO; 1234 } else { 1235 bioerr = 0; 1236 } 1237 1238 xdf_iofini(vdp, id, bioerr); 1239 } 1240 1241 mutex_exit(&vdp->xdf_dev_lk); 1242 1243 if (!do_polled_io) 1244 xdf_iostart(vdp); 1245 1246 return (DDI_INTR_CLAIMED); 1247 } 1248 1249 int xdf_fbrewrites; /* how many times was our flush block rewritten */ 1250 1251 /* 1252 * Snarf new data if our flush block was re-written 1253 */ 1254 static void 1255 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 1256 { 1257 int nblks; 1258 boolean_t mapin; 1259 1260 if (IS_WRITE_BARRIER(vdp, bp)) 1261 return; /* write was a flush write */ 1262 1263 mapin = B_FALSE; 1264 nblks = bp->b_bcount >> DEV_BSHIFT; 1265 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 1266 xdf_fbrewrites++; 1267 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 1268 mapin = B_TRUE; 1269 bp_mapin(bp); 1270 } 1271 bcopy(bp->b_un.b_addr + 1272 ((xdf_flush_block - blkno) << DEV_BSHIFT), 1273 vdp->xdf_cache_flush_block, DEV_BSIZE); 1274 if (mapin) 1275 bp_mapout(bp); 1276 } 1277 } 1278 1279 static void 1280 xdf_iofini(xdf_t *vdp, uint64_t id, int bioerr) 1281 { 1282 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 1283 v_req_t *vreq = gs->vreq; 1284 buf_t *bp = vreq->v_buf; 1285 1286 gs_free(vdp, gs); 1287 if (bioerr) 1288 bioerror(bp, bioerr); 1289 vreq->v_nslots--; 1290 if (vreq->v_nslots != 0) 1291 return; 1292 1293 XDF_UPDATE_IO_STAT(vdp, bp); 1294 if (vdp->xdf_xdev_iostat != NULL) 1295 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1296 1297 if (IS_ERROR(bp)) 1298 bp->b_resid = bp->b_bcount; 1299 1300 vreq_free(vdp, vreq); 1301 biodone(bp); 1302 } 1303 1304 /* 1305 * return value of xdf_prepare_rreq() 1306 * used in xdf_iostart() 1307 */ 1308 #define XF_PARTIAL 0 /* rreq is full, not all I/O in buf transferred */ 1309 #define XF_COMP 1 /* no more I/O left in buf */ 1310 1311 static void 1312 xdf_iostart(xdf_t *vdp) 1313 { 1314 xendev_ring_t *xbr; 1315 struct buf *bp; 1316 blkif_request_t *rreq; 1317 int retval; 1318 int rreqready = 0; 1319 1320 xbr = vdp->xdf_xb_ring; 1321 1322 /* 1323 * populate the ring request(s) 1324 * 1325 * loop until there is no buf to transfer or no free slot 1326 * available in I/O ring 1327 */ 1328 mutex_enter(&vdp->xdf_dev_lk); 1329 1330 for (;;) { 1331 if (vdp->xdf_status != XD_READY) 1332 break; 1333 1334 /* active buf queue empty? */ 1335 if ((bp = vdp->xdf_f_act) == NULL) 1336 break; 1337 1338 /* try to grab a vreq for this bp */ 1339 if ((BP2VREQ(bp) == NULL) && (vreq_get(vdp, bp) == NULL)) 1340 break; 1341 /* alloc DMA/GTE resources */ 1342 if (vreq_setup(vdp, BP2VREQ(bp)) != DDI_SUCCESS) 1343 break; 1344 1345 /* get next blkif_request in the ring */ 1346 if ((rreq = xvdi_ring_get_request(xbr)) == NULL) 1347 break; 1348 bzero(rreq, sizeof (blkif_request_t)); 1349 1350 /* populate blkif_request with this buf */ 1351 rreqready++; 1352 retval = xdf_prepare_rreq(vdp, bp, rreq); 1353 if (retval == XF_COMP) { 1354 /* finish this bp, switch to next one */ 1355 if (vdp->xdf_xdev_iostat != NULL) 1356 kstat_waitq_to_runq( 1357 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1358 vdp->xdf_f_act = bp->av_forw; 1359 bp->av_forw = NULL; 1360 } 1361 } 1362 1363 /* 1364 * Send the request(s) to the backend 1365 */ 1366 if (rreqready) { 1367 if (xvdi_ring_push_request(xbr)) { 1368 DPRINTF(IO_DBG, ("xdf_iostart: " 1369 "sent request(s) to backend\n")); 1370 xvdi_notify_oe(vdp->xdf_dip); 1371 } 1372 } 1373 1374 mutex_exit(&vdp->xdf_dev_lk); 1375 } 1376 1377 /* 1378 * populate a single blkif_request_t w/ a buf 1379 */ 1380 static int 1381 xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1382 { 1383 int rval; 1384 grant_ref_t gr; 1385 uint8_t fsect, lsect; 1386 size_t bcnt; 1387 paddr_t dma_addr; 1388 off_t blk_off; 1389 dev_info_t *dip = vdp->xdf_dip; 1390 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1391 v_req_t *vreq = BP2VREQ(bp); 1392 uint64_t blkno = vreq->v_blkno; 1393 uint_t ndmacs = vreq->v_ndmacs; 1394 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1395 int seg = 0; 1396 int isread = IS_READ(bp); 1397 1398 if (isread) 1399 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1400 else { 1401 switch (vreq->v_flush_diskcache) { 1402 case FLUSH_DISKCACHE: 1403 ddi_put8(acchdl, &rreq->operation, 1404 BLKIF_OP_FLUSH_DISKCACHE); 1405 ddi_put16(acchdl, &rreq->handle, vdev); 1406 ddi_put64(acchdl, &rreq->id, 1407 (uint64_t)(uintptr_t)(vreq->v_gs)); 1408 ddi_put8(acchdl, &rreq->nr_segments, 0); 1409 return (XF_COMP); 1410 case WRITE_BARRIER: 1411 ddi_put8(acchdl, &rreq->operation, 1412 BLKIF_OP_WRITE_BARRIER); 1413 break; 1414 default: 1415 if (!vdp->xdf_wce) 1416 ddi_put8(acchdl, &rreq->operation, 1417 BLKIF_OP_WRITE_BARRIER); 1418 else 1419 ddi_put8(acchdl, &rreq->operation, 1420 BLKIF_OP_WRITE); 1421 break; 1422 } 1423 } 1424 1425 ddi_put16(acchdl, &rreq->handle, vdev); 1426 ddi_put64(acchdl, &rreq->sector_number, blkno); 1427 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(vreq->v_gs)); 1428 1429 /* 1430 * loop until all segments are populated or no more dma cookie in buf 1431 */ 1432 for (;;) { 1433 /* 1434 * Each segment of a blkif request can transfer up to 1435 * one 4K page of data. 1436 */ 1437 bcnt = vreq->v_dmac.dmac_size; 1438 ASSERT(bcnt <= PAGESIZE); 1439 ASSERT((bcnt % XB_BSIZE) == 0); 1440 dma_addr = vreq->v_dmac.dmac_laddress; 1441 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1442 ASSERT((blk_off & XB_BMASK) == 0); 1443 fsect = blk_off >> XB_BSHIFT; 1444 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1445 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1446 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1447 DPRINTF(IO_DBG, (" ""seg%d: dmacS %lu blk_off %ld\n", 1448 seg, vreq->v_dmac.dmac_size, blk_off)); 1449 gr = gs_grant(vreq->v_gs, PATOMA(dma_addr) >> PAGESHIFT); 1450 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1451 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1452 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1453 DPRINTF(IO_DBG, (" ""seg%d: fs %d ls %d gr %d dma 0x%"PRIx64 1454 "\n", seg, fsect, lsect, gr, dma_addr)); 1455 1456 blkno += (bcnt >> XB_BSHIFT); 1457 seg++; 1458 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1459 if (--ndmacs) { 1460 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1461 continue; 1462 } 1463 1464 vreq->v_status = VREQ_DMAWIN_DONE; 1465 vreq->v_blkno = blkno; 1466 if (vreq->v_dmaw + 1 == vreq->v_ndmaws) 1467 /* last win */ 1468 rval = XF_COMP; 1469 else 1470 rval = XF_PARTIAL; 1471 break; 1472 } 1473 ddi_put8(acchdl, &rreq->nr_segments, seg); 1474 DPRINTF(IO_DBG, ("xdf_prepare_rreq: request id=%"PRIx64" ready\n", 1475 rreq->id)); 1476 1477 return (rval); 1478 } 1479 1480 #define XDF_QSEC 50000 /* .005 second */ 1481 #define XDF_POLLCNT 12 /* loop for 12 times before time out */ 1482 1483 static int 1484 xdf_drain_io(xdf_t *vdp) 1485 { 1486 int pollc, rval; 1487 xendev_ring_t *xbr; 1488 1489 if (xdfdebug & SUSRES_DBG) 1490 xen_printf("xdf_drain_io: start\n"); 1491 1492 mutex_enter(&vdp->xdf_dev_lk); 1493 1494 if ((vdp->xdf_status != XD_READY) && (vdp->xdf_status != XD_SUSPEND)) 1495 goto out; 1496 1497 rval = 0; 1498 xbr = vdp->xdf_xb_ring; 1499 ASSERT(xbr != NULL); 1500 1501 for (pollc = 0; pollc < XDF_POLLCNT; pollc++) { 1502 if (xvdi_ring_has_unconsumed_responses(xbr)) { 1503 mutex_exit(&vdp->xdf_dev_lk); 1504 (void) xdf_intr((caddr_t)vdp); 1505 mutex_enter(&vdp->xdf_dev_lk); 1506 } 1507 if (!xvdi_ring_has_incomp_request(xbr)) 1508 goto out; 1509 1510 #ifndef XPV_HVM_DRIVER 1511 (void) HYPERVISOR_yield(); 1512 #endif /* XPV_HVM_DRIVER */ 1513 /* 1514 * file-backed devices can be slow 1515 */ 1516 drv_usecwait(XDF_QSEC << pollc); 1517 } 1518 cmn_err(CE_WARN, "xdf_polled_io: timeout"); 1519 rval = EIO; 1520 out: 1521 mutex_exit(&vdp->xdf_dev_lk); 1522 if (xdfdebug & SUSRES_DBG) 1523 xen_printf("xdf_drain_io: end, err=%d\n", rval); 1524 return (rval); 1525 } 1526 1527 /* ARGSUSED5 */ 1528 int 1529 xdf_lb_rdwr(dev_info_t *devi, uchar_t cmd, void *bufp, 1530 diskaddr_t start, size_t reqlen, void *tg_cookie) 1531 { 1532 xdf_t *vdp; 1533 struct buf *bp; 1534 int err = 0; 1535 1536 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1537 if (vdp == NULL) 1538 return (ENXIO); 1539 1540 if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) 1541 return (EINVAL); 1542 1543 bp = getrbuf(KM_SLEEP); 1544 if (cmd == TG_READ) 1545 bp->b_flags = B_BUSY | B_READ; 1546 else 1547 bp->b_flags = B_BUSY | B_WRITE; 1548 bp->b_un.b_addr = bufp; 1549 bp->b_bcount = reqlen; 1550 bp->b_blkno = start; 1551 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 1552 1553 mutex_enter(&vdp->xdf_dev_lk); 1554 if (vdp->xdf_xdev_iostat != NULL) 1555 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1556 if (vdp->xdf_f_act == NULL) { 1557 vdp->xdf_f_act = vdp->xdf_l_act = bp; 1558 } else { 1559 vdp->xdf_l_act->av_forw = bp; 1560 vdp->xdf_l_act = bp; 1561 } 1562 mutex_exit(&vdp->xdf_dev_lk); 1563 xdf_iostart(vdp); 1564 err = biowait(bp); 1565 1566 ASSERT(bp->b_flags & B_DONE); 1567 1568 freerbuf(bp); 1569 return (err); 1570 } 1571 1572 /* 1573 * synthetic geometry 1574 */ 1575 #define XDF_NSECTS 256 1576 #define XDF_NHEADS 16 1577 1578 static void 1579 xdf_synthetic_pgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1580 { 1581 xdf_t *vdp; 1582 uint_t ncyl; 1583 1584 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1585 1586 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); 1587 1588 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1589 geomp->g_acyl = 0; 1590 geomp->g_nhead = XDF_NHEADS; 1591 geomp->g_secsize = XB_BSIZE; 1592 geomp->g_nsect = XDF_NSECTS; 1593 geomp->g_intrlv = 0; 1594 geomp->g_rpm = 7200; 1595 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1596 } 1597 1598 static int 1599 xdf_lb_getcap(dev_info_t *devi, diskaddr_t *capp) 1600 { 1601 xdf_t *vdp; 1602 1603 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1604 1605 if (vdp == NULL) 1606 return (ENXIO); 1607 1608 mutex_enter(&vdp->xdf_dev_lk); 1609 *capp = vdp->xdf_pgeom.g_capacity; 1610 DPRINTF(LBL_DBG, ("capacity %llu\n", *capp)); 1611 mutex_exit(&vdp->xdf_dev_lk); 1612 return (0); 1613 } 1614 1615 static int 1616 xdf_lb_getpgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1617 { 1618 xdf_t *vdp; 1619 1620 if ((vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi))) == NULL) 1621 return (ENXIO); 1622 *geomp = vdp->xdf_pgeom; 1623 return (0); 1624 } 1625 1626 /* 1627 * No real HBA, no geometry available from it 1628 */ 1629 /*ARGSUSED*/ 1630 static int 1631 xdf_lb_getvgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1632 { 1633 return (EINVAL); 1634 } 1635 1636 static int 1637 xdf_lb_getattribute(dev_info_t *devi, tg_attribute_t *tgattributep) 1638 { 1639 xdf_t *vdp; 1640 1641 if (!(vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)))) 1642 return (ENXIO); 1643 1644 if (XD_IS_RO(vdp)) 1645 tgattributep->media_is_writable = 0; 1646 else 1647 tgattributep->media_is_writable = 1; 1648 return (0); 1649 } 1650 1651 /* ARGSUSED3 */ 1652 int 1653 xdf_lb_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie) 1654 { 1655 switch (cmd) { 1656 case TG_GETPHYGEOM: 1657 return (xdf_lb_getpgeom(devi, (cmlb_geom_t *)arg)); 1658 case TG_GETVIRTGEOM: 1659 return (xdf_lb_getvgeom(devi, (cmlb_geom_t *)arg)); 1660 case TG_GETCAPACITY: 1661 return (xdf_lb_getcap(devi, (diskaddr_t *)arg)); 1662 case TG_GETBLOCKSIZE: 1663 *(uint32_t *)arg = XB_BSIZE; 1664 return (0); 1665 case TG_GETATTR: 1666 return (xdf_lb_getattribute(devi, (tg_attribute_t *)arg)); 1667 default: 1668 return (ENOTTY); 1669 } 1670 } 1671 1672 /* 1673 * Kick-off connect process 1674 * Status should be XD_UNKNOWN or XD_CLOSED 1675 * On success, status will be changed to XD_INIT 1676 * On error, status won't be changed 1677 */ 1678 static int 1679 xdf_start_connect(xdf_t *vdp) 1680 { 1681 char *xsnode; 1682 grant_ref_t gref; 1683 xenbus_transaction_t xbt; 1684 int rv; 1685 dev_info_t *dip = vdp->xdf_dip; 1686 1687 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == (domid_t)-1) 1688 goto errout; 1689 1690 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) { 1691 cmn_err(CE_WARN, "xdf@%s: failed to alloc event channel", 1692 ddi_get_name_addr(dip)); 1693 goto errout; 1694 } 1695 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1696 #ifdef XPV_HVM_DRIVER 1697 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1698 #else /* !XPV_HVM_DRIVER */ 1699 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1700 DDI_SUCCESS) { 1701 cmn_err(CE_WARN, "xdf_start_connect: xdf@%s: " 1702 "failed to add intr handler", ddi_get_name_addr(dip)); 1703 goto errout1; 1704 } 1705 #endif /* !XPV_HVM_DRIVER */ 1706 1707 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1708 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1709 DDI_SUCCESS) { 1710 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1711 ddi_get_name_addr(dip)); 1712 goto errout2; 1713 } 1714 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1715 1716 /* 1717 * Write into xenstore the info needed by backend 1718 */ 1719 if ((xsnode = xvdi_get_xsname(dip)) == NULL) { 1720 cmn_err(CE_WARN, "xdf@%s: " 1721 "failed to get xenstore node path", 1722 ddi_get_name_addr(dip)); 1723 goto fail_trans; 1724 } 1725 trans_retry: 1726 if (xenbus_transaction_start(&xbt)) { 1727 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1728 ddi_get_name_addr(dip)); 1729 xvdi_fatal_error(dip, EIO, "transaction start"); 1730 goto fail_trans; 1731 } 1732 1733 if (rv = xenbus_printf(xbt, xsnode, "ring-ref", "%u", gref)) { 1734 cmn_err(CE_WARN, "xdf@%s: failed to write ring-ref", 1735 ddi_get_name_addr(dip)); 1736 xvdi_fatal_error(dip, rv, "writing ring-ref"); 1737 goto abort_trans; 1738 } 1739 1740 if (rv = xenbus_printf(xbt, xsnode, "event-channel", "%u", 1741 vdp->xdf_evtchn)) { 1742 cmn_err(CE_WARN, "xdf@%s: failed to write event-channel", 1743 ddi_get_name_addr(dip)); 1744 xvdi_fatal_error(dip, rv, "writing event-channel"); 1745 goto abort_trans; 1746 } 1747 1748 /* 1749 * "protocol" is written by the domain builder in the case of PV 1750 * domains. However, it is not written for HVM domains, so let's 1751 * write it here. 1752 */ 1753 if (rv = xenbus_printf(xbt, xsnode, "protocol", "%s", 1754 XEN_IO_PROTO_ABI_NATIVE)) { 1755 cmn_err(CE_WARN, "xdf@%s: failed to write protocol", 1756 ddi_get_name_addr(dip)); 1757 xvdi_fatal_error(dip, rv, "writing protocol"); 1758 goto abort_trans; 1759 } 1760 1761 if ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0) { 1762 cmn_err(CE_WARN, "xdf@%s: " 1763 "failed to switch state to XenbusStateInitialised", 1764 ddi_get_name_addr(dip)); 1765 xvdi_fatal_error(dip, rv, "writing state"); 1766 goto abort_trans; 1767 } 1768 1769 /* kick-off connect process */ 1770 if (rv = xenbus_transaction_end(xbt, 0)) { 1771 if (rv == EAGAIN) 1772 goto trans_retry; 1773 cmn_err(CE_WARN, "xdf@%s: failed to end transaction", 1774 ddi_get_name_addr(dip)); 1775 xvdi_fatal_error(dip, rv, "completing transaction"); 1776 goto fail_trans; 1777 } 1778 1779 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1780 mutex_enter(&vdp->xdf_dev_lk); 1781 vdp->xdf_status = XD_INIT; 1782 mutex_exit(&vdp->xdf_dev_lk); 1783 1784 return (DDI_SUCCESS); 1785 1786 abort_trans: 1787 (void) xenbus_transaction_end(xbt, 1); 1788 fail_trans: 1789 xvdi_free_ring(vdp->xdf_xb_ring); 1790 errout2: 1791 #ifdef XPV_HVM_DRIVER 1792 ec_unbind_evtchn(vdp->xdf_evtchn); 1793 #else /* !XPV_HVM_DRIVER */ 1794 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1795 #endif /* !XPV_HVM_DRIVER */ 1796 errout1: 1797 xvdi_free_evtchn(dip); 1798 errout: 1799 cmn_err(CE_WARN, "xdf@%s: fail to kick-off connecting", 1800 ddi_get_name_addr(dip)); 1801 return (DDI_FAILURE); 1802 } 1803 1804 /* 1805 * Kick-off disconnect process 1806 * Status won't be changed 1807 */ 1808 static int 1809 xdf_start_disconnect(xdf_t *vdp) 1810 { 1811 if (xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed) > 0) { 1812 cmn_err(CE_WARN, "xdf@%s: fail to kick-off disconnecting", 1813 ddi_get_name_addr(vdp->xdf_dip)); 1814 return (DDI_FAILURE); 1815 } 1816 1817 return (DDI_SUCCESS); 1818 } 1819 1820 int 1821 xdf_get_flush_block(xdf_t *vdp) 1822 { 1823 /* 1824 * Get a DEV_BSIZE aligned bufer 1825 */ 1826 vdp->xdf_flush_mem = kmem_alloc(DEV_BSIZE * 2, KM_SLEEP); 1827 vdp->xdf_cache_flush_block = 1828 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), DEV_BSIZE); 1829 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1830 xdf_flush_block, DEV_BSIZE, NULL) != 0) 1831 return (DDI_FAILURE); 1832 return (DDI_SUCCESS); 1833 } 1834 1835 /* 1836 * Finish other initialization after we've connected to backend 1837 * Status should be XD_INIT before calling this routine 1838 * On success, status should be changed to XD_READY 1839 * On error, status should stay XD_INIT 1840 */ 1841 static int 1842 xdf_post_connect(xdf_t *vdp) 1843 { 1844 int rv; 1845 uint_t len; 1846 char *type; 1847 char *barrier; 1848 dev_info_t *devi = vdp->xdf_dip; 1849 1850 /* 1851 * Determine if feature barrier is supported by backend 1852 */ 1853 if (xenbus_read(XBT_NULL, xvdi_get_oename(devi), 1854 "feature-barrier", (void **)&barrier, &len) == 0) { 1855 vdp->xdf_feature_barrier = 1; 1856 kmem_free(barrier, len); 1857 } else { 1858 cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier", 1859 ddi_get_name_addr(vdp->xdf_dip)); 1860 vdp->xdf_feature_barrier = 0; 1861 } 1862 1863 /* probe backend */ 1864 if (rv = xenbus_gather(XBT_NULL, xvdi_get_oename(devi), 1865 "sectors", "%"SCNu64, &vdp->xdf_xdev_nblocks, 1866 "info", "%u", &vdp->xdf_xdev_info, NULL)) { 1867 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1868 "cannot read backend info", ddi_get_name_addr(devi)); 1869 xvdi_fatal_error(devi, rv, "reading backend info"); 1870 return (DDI_FAILURE); 1871 } 1872 1873 /* 1874 * Make sure that the device we're connecting isn't smaller than 1875 * the old connected device. 1876 */ 1877 if (vdp->xdf_xdev_nblocks < vdp->xdf_pgeom.g_capacity) { 1878 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1879 "backend disk device shrank", ddi_get_name_addr(devi)); 1880 /* XXX: call xvdi_fatal_error() here? */ 1881 xvdi_fatal_error(devi, rv, "reading backend info"); 1882 return (DDI_FAILURE); 1883 } 1884 1885 /* 1886 * Only update the physical geometry to reflect the new device 1887 * size if this is the first time we're connecting to the backend 1888 * device. Once we assign a physical geometry to a device it stays 1889 * fixed until: 1890 * - we get detach and re-attached (at which point we 1891 * automatically assign a new physical geometry). 1892 * - someone calls TG_SETPHYGEOM to explicity set the 1893 * physical geometry. 1894 */ 1895 if (vdp->xdf_pgeom.g_capacity == 0) 1896 xdf_synthetic_pgeom(devi, &vdp->xdf_pgeom); 1897 1898 /* fix disk type */ 1899 if (xenbus_read(XBT_NULL, xvdi_get_xsname(devi), "device-type", 1900 (void **)&type, &len) != 0) { 1901 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1902 "cannot read device-type", ddi_get_name_addr(devi)); 1903 xvdi_fatal_error(devi, rv, "reading device-type"); 1904 return (DDI_FAILURE); 1905 } 1906 if (strcmp(type, "cdrom") == 0) 1907 vdp->xdf_xdev_info |= VDISK_CDROM; 1908 kmem_free(type, len); 1909 1910 /* 1911 * We've created all the minor nodes via cmlb_attach() using default 1912 * value in xdf_attach() to make it possible to block in xdf_open(), 1913 * in case there's anyone (say, booting thread) ever trying to open 1914 * it before connected to backend. We will refresh all those minor 1915 * nodes w/ latest info we've got now when we are almost connected. 1916 * 1917 * Don't do this when xdf is already opened by someone (could happen 1918 * during resume), for that cmlb_attach() will invalid the label info 1919 * and confuse those who has already opened the node, which is bad. 1920 */ 1921 if (!xdf_isopen(vdp, -1) && (XD_IS_CD(vdp) || XD_IS_RM(vdp))) { 1922 /* re-init cmlb w/ latest info we got from backend */ 1923 if (cmlb_attach(devi, &xdf_lb_ops, 1924 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 1925 XD_IS_RM(vdp), 1, 1926 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD, 1927 #if defined(XPV_HVM_DRIVER) 1928 CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT | 1929 CMLB_INTERNAL_MINOR_NODES, 1930 #else /* !XPV_HVM_DRIVER */ 1931 CMLB_FAKE_LABEL_ONE_PARTITION, 1932 #endif /* !XPV_HVM_DRIVER */ 1933 vdp->xdf_vd_lbl, NULL) != 0) { 1934 cmn_err(CE_WARN, "xdf@%s: cmlb attach failed", 1935 ddi_get_name_addr(devi)); 1936 return (DDI_FAILURE); 1937 } 1938 } 1939 1940 /* mark vbd is ready for I/O */ 1941 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1942 mutex_enter(&vdp->xdf_dev_lk); 1943 vdp->xdf_status = XD_READY; 1944 mutex_exit(&vdp->xdf_dev_lk); 1945 /* 1946 * If backend has feature-barrier, see if it supports disk 1947 * cache flush op. 1948 */ 1949 vdp->xdf_flush_supported = 0; 1950 if (vdp->xdf_feature_barrier) { 1951 /* 1952 * Pretend we already know flush is supported so probe 1953 * will attempt the correct op. 1954 */ 1955 vdp->xdf_flush_supported = 1; 1956 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1957 vdp->xdf_flush_supported = 1; 1958 } else { 1959 vdp->xdf_flush_supported = 0; 1960 /* 1961 * If the other end does not support the cache flush op 1962 * then we must use a barrier-write to force disk 1963 * cache flushing. Barrier writes require that a data 1964 * block actually be written. 1965 * Cache a block to barrier-write when we are 1966 * asked to perform a flush. 1967 * XXX - would it be better to just copy 1 block 1968 * (512 bytes) from whatever write we did last 1969 * and rewrite that block? 1970 */ 1971 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) 1972 return (DDI_FAILURE); 1973 } 1974 } 1975 1976 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", ddi_get_name_addr(devi), 1977 (uint64_t)vdp->xdf_xdev_nblocks); 1978 1979 return (DDI_SUCCESS); 1980 } 1981 1982 /* 1983 * Finish other uninitialization after we've disconnected from backend 1984 * when status is XD_CLOSING or XD_INIT. After returns, status is XD_CLOSED 1985 */ 1986 static void 1987 xdf_post_disconnect(xdf_t *vdp) 1988 { 1989 #ifdef XPV_HVM_DRIVER 1990 ec_unbind_evtchn(vdp->xdf_evtchn); 1991 #else /* !XPV_HVM_DRIVER */ 1992 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1993 #endif /* !XPV_HVM_DRIVER */ 1994 xvdi_free_evtchn(vdp->xdf_dip); 1995 xvdi_free_ring(vdp->xdf_xb_ring); 1996 vdp->xdf_xb_ring = NULL; 1997 vdp->xdf_xb_ring_hdl = NULL; 1998 vdp->xdf_peer = (domid_t)-1; 1999 2000 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 2001 mutex_enter(&vdp->xdf_dev_lk); 2002 vdp->xdf_status = XD_CLOSED; 2003 mutex_exit(&vdp->xdf_dev_lk); 2004 } 2005 2006 /*ARGSUSED*/ 2007 static void 2008 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 2009 { 2010 XenbusState new_state = *(XenbusState *)impl_data; 2011 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2012 boolean_t unexpect_die = B_FALSE; 2013 int status; 2014 2015 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 2016 ddi_get_name_addr(dip), new_state)); 2017 2018 mutex_enter(&vdp->xdf_cb_lk); 2019 2020 if (xdf_check_state_transition(vdp, new_state) == DDI_FAILURE) { 2021 mutex_exit(&vdp->xdf_cb_lk); 2022 return; 2023 } 2024 2025 switch (new_state) { 2026 case XenbusStateInitialising: 2027 ASSERT(vdp->xdf_status == XD_CLOSED); 2028 /* 2029 * backend recovered from a previous failure, 2030 * kick-off connect process again 2031 */ 2032 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 2033 cmn_err(CE_WARN, "xdf@%s:" 2034 " failed to start reconnecting to backend", 2035 ddi_get_name_addr(dip)); 2036 } 2037 break; 2038 case XenbusStateConnected: 2039 ASSERT(vdp->xdf_status == XD_INIT); 2040 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 2041 /* finish final init after connect */ 2042 if (xdf_post_connect(vdp) != DDI_SUCCESS) 2043 (void) xdf_start_disconnect(vdp); 2044 break; 2045 case XenbusStateClosing: 2046 if (vdp->xdf_status == XD_READY) { 2047 mutex_enter(&vdp->xdf_dev_lk); 2048 if (xdf_isopen(vdp, -1)) { 2049 cmn_err(CE_NOTE, "xdf@%s: hot-unplug failed, " 2050 "still in use", ddi_get_name_addr(dip)); 2051 mutex_exit(&vdp->xdf_dev_lk); 2052 break; 2053 } else { 2054 vdp->xdf_status = XD_CLOSING; 2055 } 2056 mutex_exit(&vdp->xdf_dev_lk); 2057 } 2058 (void) xdf_start_disconnect(vdp); 2059 break; 2060 case XenbusStateClosed: 2061 /* first check if BE closed unexpectedly */ 2062 mutex_enter(&vdp->xdf_dev_lk); 2063 if (xdf_isopen(vdp, -1)) { 2064 unexpect_die = B_TRUE; 2065 unexpectedie(vdp); 2066 cmn_err(CE_WARN, "xdf@%s: backend closed, " 2067 "reconnecting...", ddi_get_name_addr(dip)); 2068 } 2069 mutex_exit(&vdp->xdf_dev_lk); 2070 2071 if (vdp->xdf_status == XD_READY) { 2072 mutex_enter(&vdp->xdf_dev_lk); 2073 vdp->xdf_status = XD_CLOSING; 2074 mutex_exit(&vdp->xdf_dev_lk); 2075 2076 #ifdef DOMU_BACKEND 2077 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 2078 #endif 2079 2080 xdf_post_disconnect(vdp); 2081 (void) xvdi_switch_state(dip, XBT_NULL, 2082 XenbusStateClosed); 2083 } else if ((vdp->xdf_status == XD_INIT) || 2084 (vdp->xdf_status == XD_CLOSING)) { 2085 xdf_post_disconnect(vdp); 2086 } else { 2087 mutex_enter(&vdp->xdf_dev_lk); 2088 vdp->xdf_status = XD_CLOSED; 2089 mutex_exit(&vdp->xdf_dev_lk); 2090 } 2091 } 2092 2093 /* notify anybody waiting for oe state change */ 2094 mutex_enter(&vdp->xdf_dev_lk); 2095 cv_broadcast(&vdp->xdf_dev_cv); 2096 mutex_exit(&vdp->xdf_dev_lk); 2097 2098 status = vdp->xdf_status; 2099 mutex_exit(&vdp->xdf_cb_lk); 2100 2101 if (status == XD_READY) { 2102 xdf_iostart(vdp); 2103 } else if ((status == XD_CLOSED) && !unexpect_die) { 2104 /* interface is closed successfully, remove all minor nodes */ 2105 if (vdp->xdf_vd_lbl != NULL) { 2106 cmlb_detach(vdp->xdf_vd_lbl, NULL); 2107 cmlb_free_handle(&vdp->xdf_vd_lbl); 2108 vdp->xdf_vd_lbl = NULL; 2109 } 2110 } 2111 } 2112 2113 /* check if partition is open, -1 - check all partitions on the disk */ 2114 static boolean_t 2115 xdf_isopen(xdf_t *vdp, int partition) 2116 { 2117 int i; 2118 ulong_t parbit; 2119 boolean_t rval = B_FALSE; 2120 2121 ASSERT((partition == -1) || 2122 ((partition >= 0) || (partition < XDF_PEXT))); 2123 2124 if (partition == -1) 2125 parbit = (ulong_t)-1; 2126 else 2127 parbit = 1 << partition; 2128 2129 for (i = 0; i < OTYPCNT; i++) { 2130 if (vdp->xdf_vd_open[i] & parbit) 2131 rval = B_TRUE; 2132 } 2133 2134 return (rval); 2135 } 2136 2137 /* 2138 * Xdf_check_state_transition will check the XenbusState change to see 2139 * if the change is a valid transition or not. 2140 * The new state is written by backend domain, or by running xenstore-write 2141 * to change it manually in dom0 2142 */ 2143 static int 2144 xdf_check_state_transition(xdf_t *vdp, XenbusState oestate) 2145 { 2146 int status; 2147 int stcheck; 2148 #define STOK 0 /* need further process */ 2149 #define STNOP 1 /* no action need taking */ 2150 #define STBUG 2 /* unexpected state change, could be a bug */ 2151 2152 status = vdp->xdf_status; 2153 stcheck = STOK; 2154 2155 switch (status) { 2156 case XD_UNKNOWN: 2157 if ((oestate == XenbusStateUnknown) || 2158 (oestate == XenbusStateConnected)) 2159 stcheck = STBUG; 2160 else if ((oestate == XenbusStateInitialising) || 2161 (oestate == XenbusStateInitWait) || 2162 (oestate == XenbusStateInitialised)) 2163 stcheck = STNOP; 2164 break; 2165 case XD_INIT: 2166 if (oestate == XenbusStateUnknown) 2167 stcheck = STBUG; 2168 else if ((oestate == XenbusStateInitialising) || 2169 (oestate == XenbusStateInitWait) || 2170 (oestate == XenbusStateInitialised)) 2171 stcheck = STNOP; 2172 break; 2173 case XD_READY: 2174 if ((oestate == XenbusStateUnknown) || 2175 (oestate == XenbusStateInitialising) || 2176 (oestate == XenbusStateInitWait) || 2177 (oestate == XenbusStateInitialised)) 2178 stcheck = STBUG; 2179 else if (oestate == XenbusStateConnected) 2180 stcheck = STNOP; 2181 break; 2182 case XD_CLOSING: 2183 if ((oestate == XenbusStateUnknown) || 2184 (oestate == XenbusStateInitialising) || 2185 (oestate == XenbusStateInitWait) || 2186 (oestate == XenbusStateInitialised) || 2187 (oestate == XenbusStateConnected)) 2188 stcheck = STBUG; 2189 else if (oestate == XenbusStateClosing) 2190 stcheck = STNOP; 2191 break; 2192 case XD_CLOSED: 2193 if ((oestate == XenbusStateUnknown) || 2194 (oestate == XenbusStateConnected)) 2195 stcheck = STBUG; 2196 else if ((oestate == XenbusStateInitWait) || 2197 (oestate == XenbusStateInitialised) || 2198 (oestate == XenbusStateClosing) || 2199 (oestate == XenbusStateClosed)) 2200 stcheck = STNOP; 2201 break; 2202 case XD_SUSPEND: 2203 default: 2204 stcheck = STBUG; 2205 } 2206 2207 if (stcheck == STOK) 2208 return (DDI_SUCCESS); 2209 2210 if (stcheck == STBUG) 2211 cmn_err(CE_NOTE, "xdf@%s: unexpected otherend " 2212 "state change to %d!, when status is %d", 2213 ddi_get_name_addr(vdp->xdf_dip), oestate, status); 2214 2215 return (DDI_FAILURE); 2216 } 2217 2218 static int 2219 xdf_connect(xdf_t *vdp, boolean_t wait) 2220 { 2221 ASSERT(mutex_owned(&vdp->xdf_dev_lk)); 2222 while (vdp->xdf_status != XD_READY) { 2223 if (!wait || (vdp->xdf_status > XD_READY)) 2224 break; 2225 2226 if (cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk) == 0) 2227 break; 2228 } 2229 2230 return (vdp->xdf_status); 2231 } 2232 2233 /* 2234 * callback func when DMA/GTE resources is available 2235 * 2236 * Note: we only register one callback function to grant table subsystem 2237 * since we only have one 'struct gnttab_free_callback' in xdf_t. 2238 */ 2239 static int 2240 xdf_dmacallback(caddr_t arg) 2241 { 2242 xdf_t *vdp = (xdf_t *)arg; 2243 ASSERT(vdp != NULL); 2244 2245 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 2246 ddi_get_name_addr(vdp->xdf_dip))); 2247 2248 ddi_trigger_softintr(vdp->xdf_softintr_id); 2249 return (DDI_DMA_CALLBACK_DONE); 2250 } 2251 2252 static uint_t 2253 xdf_iorestart(caddr_t arg) 2254 { 2255 xdf_t *vdp = (xdf_t *)arg; 2256 2257 ASSERT(vdp != NULL); 2258 2259 mutex_enter(&vdp->xdf_dev_lk); 2260 ASSERT(ISDMACBON(vdp)); 2261 SETDMACBOFF(vdp); 2262 mutex_exit(&vdp->xdf_dev_lk); 2263 2264 xdf_iostart(vdp); 2265 2266 return (DDI_INTR_CLAIMED); 2267 } 2268 2269 static void 2270 xdf_timeout_handler(void *arg) 2271 { 2272 xdf_t *vdp = arg; 2273 2274 mutex_enter(&vdp->xdf_dev_lk); 2275 vdp->xdf_timeout_id = 0; 2276 mutex_exit(&vdp->xdf_dev_lk); 2277 2278 /* new timeout thread could be re-scheduled */ 2279 xdf_iostart(vdp); 2280 } 2281 2282 /* 2283 * Alloc a vreq for this bp 2284 * bp->av_back contains the pointer to the vreq upon return 2285 */ 2286 static v_req_t * 2287 vreq_get(xdf_t *vdp, buf_t *bp) 2288 { 2289 v_req_t *vreq = NULL; 2290 2291 ASSERT(BP2VREQ(bp) == NULL); 2292 2293 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 2294 if (vreq == NULL) { 2295 if (vdp->xdf_timeout_id == 0) 2296 /* restart I/O after one second */ 2297 vdp->xdf_timeout_id = 2298 timeout(xdf_timeout_handler, vdp, hz); 2299 return (NULL); 2300 } 2301 bzero(vreq, sizeof (v_req_t)); 2302 2303 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 2304 bp->av_back = (buf_t *)vreq; 2305 vreq->v_buf = bp; 2306 vreq->v_status = VREQ_INIT; 2307 /* init of other fields in vreq is up to the caller */ 2308 2309 return (vreq); 2310 } 2311 2312 static void 2313 vreq_free(xdf_t *vdp, v_req_t *vreq) 2314 { 2315 buf_t *bp = vreq->v_buf; 2316 2317 list_remove(&vdp->xdf_vreq_act, (void *)vreq); 2318 2319 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 2320 goto done; 2321 2322 switch (vreq->v_status) { 2323 case VREQ_DMAWIN_DONE: 2324 case VREQ_GS_ALLOCED: 2325 case VREQ_DMABUF_BOUND: 2326 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 2327 /*FALLTHRU*/ 2328 case VREQ_DMAMEM_ALLOCED: 2329 if (!ALIGNED_XFER(bp)) { 2330 ASSERT(vreq->v_abuf != NULL); 2331 if (!IS_ERROR(bp) && IS_READ(bp)) 2332 bcopy(vreq->v_abuf, bp->b_un.b_addr, 2333 bp->b_bcount); 2334 ddi_dma_mem_free(&vreq->v_align); 2335 } 2336 /*FALLTHRU*/ 2337 case VREQ_MEMDMAHDL_ALLOCED: 2338 if (!ALIGNED_XFER(bp)) 2339 ddi_dma_free_handle(&vreq->v_memdmahdl); 2340 /*FALLTHRU*/ 2341 case VREQ_DMAHDL_ALLOCED: 2342 ddi_dma_free_handle(&vreq->v_dmahdl); 2343 break; 2344 default: 2345 break; 2346 } 2347 done: 2348 vreq->v_buf->av_back = NULL; 2349 kmem_cache_free(xdf_vreq_cache, vreq); 2350 } 2351 2352 /* 2353 * Initalize the DMA and grant table resources for the buf 2354 */ 2355 static int 2356 vreq_setup(xdf_t *vdp, v_req_t *vreq) 2357 { 2358 int rc; 2359 ddi_dma_attr_t dmaattr; 2360 uint_t ndcs, ndws; 2361 ddi_dma_handle_t dh; 2362 ddi_dma_handle_t mdh; 2363 ddi_dma_cookie_t dc; 2364 ddi_acc_handle_t abh; 2365 caddr_t aba; 2366 ge_slot_t *gs; 2367 size_t bufsz; 2368 off_t off; 2369 size_t sz; 2370 buf_t *bp = vreq->v_buf; 2371 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 2372 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 2373 2374 switch (vreq->v_status) { 2375 case VREQ_INIT: 2376 if (IS_FLUSH_DISKCACHE(bp)) { 2377 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2378 DPRINTF(DMA_DBG, ( 2379 "xdf@%s: get ge_slotfailed\n", 2380 ddi_get_name_addr(vdp->xdf_dip))); 2381 return (DDI_FAILURE); 2382 } 2383 vreq->v_blkno = 0; 2384 vreq->v_nslots = 1; 2385 vreq->v_gs = gs; 2386 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 2387 vreq->v_status = VREQ_GS_ALLOCED; 2388 gs->vreq = vreq; 2389 return (DDI_SUCCESS); 2390 } 2391 2392 if (IS_WRITE_BARRIER(vdp, bp)) 2393 vreq->v_flush_diskcache = WRITE_BARRIER; 2394 vreq->v_blkno = bp->b_blkno + 2395 (diskaddr_t)(uintptr_t)bp->b_private; 2396 bp->b_private = NULL; 2397 /* See if we wrote new data to our flush block */ 2398 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 2399 check_fbwrite(vdp, bp, vreq->v_blkno); 2400 vreq->v_status = VREQ_INIT_DONE; 2401 /*FALLTHRU*/ 2402 2403 case VREQ_INIT_DONE: 2404 /* 2405 * alloc DMA handle 2406 */ 2407 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 2408 xdf_dmacallback, (caddr_t)vdp, &dh); 2409 if (rc != DDI_SUCCESS) { 2410 SETDMACBON(vdp); 2411 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 2412 ddi_get_name_addr(vdp->xdf_dip))); 2413 return (DDI_FAILURE); 2414 } 2415 2416 vreq->v_dmahdl = dh; 2417 vreq->v_status = VREQ_DMAHDL_ALLOCED; 2418 /*FALLTHRU*/ 2419 2420 case VREQ_DMAHDL_ALLOCED: 2421 /* 2422 * alloc dma handle for 512-byte aligned buf 2423 */ 2424 if (!ALIGNED_XFER(bp)) { 2425 /* 2426 * XXPV: we need to temporarily enlarge the seg 2427 * boundary and s/g length to work round CR6381968 2428 */ 2429 dmaattr = xb_dma_attr; 2430 dmaattr.dma_attr_seg = (uint64_t)-1; 2431 dmaattr.dma_attr_sgllen = INT_MAX; 2432 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 2433 xdf_dmacallback, (caddr_t)vdp, &mdh); 2434 if (rc != DDI_SUCCESS) { 2435 SETDMACBON(vdp); 2436 DPRINTF(DMA_DBG, ("xdf@%s: unaligned buf DMA" 2437 "handle alloc failed\n", 2438 ddi_get_name_addr(vdp->xdf_dip))); 2439 return (DDI_FAILURE); 2440 } 2441 vreq->v_memdmahdl = mdh; 2442 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 2443 } 2444 /*FALLTHRU*/ 2445 2446 case VREQ_MEMDMAHDL_ALLOCED: 2447 /* 2448 * alloc 512-byte aligned buf 2449 */ 2450 if (!ALIGNED_XFER(bp)) { 2451 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 2452 bp_mapin(bp); 2453 2454 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 2455 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 2456 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 2457 &aba, &bufsz, &abh); 2458 if (rc != DDI_SUCCESS) { 2459 SETDMACBON(vdp); 2460 DPRINTF(DMA_DBG, ( 2461 "xdf@%s: DMA mem allocation failed\n", 2462 ddi_get_name_addr(vdp->xdf_dip))); 2463 return (DDI_FAILURE); 2464 } 2465 2466 vreq->v_abuf = aba; 2467 vreq->v_align = abh; 2468 vreq->v_status = VREQ_DMAMEM_ALLOCED; 2469 2470 ASSERT(bufsz >= bp->b_bcount); 2471 if (!IS_READ(bp)) 2472 bcopy(bp->b_un.b_addr, vreq->v_abuf, 2473 bp->b_bcount); 2474 } 2475 /*FALLTHRU*/ 2476 2477 case VREQ_DMAMEM_ALLOCED: 2478 /* 2479 * dma bind 2480 */ 2481 if (ALIGNED_XFER(bp)) { 2482 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 2483 dma_flags, xdf_dmacallback, (caddr_t)vdp, 2484 &dc, &ndcs); 2485 } else { 2486 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 2487 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 2488 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 2489 } 2490 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 2491 /* get num of dma windows */ 2492 if (rc == DDI_DMA_PARTIAL_MAP) { 2493 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 2494 ASSERT(rc == DDI_SUCCESS); 2495 } else { 2496 ndws = 1; 2497 } 2498 } else { 2499 SETDMACBON(vdp); 2500 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 2501 ddi_get_name_addr(vdp->xdf_dip))); 2502 return (DDI_FAILURE); 2503 } 2504 2505 vreq->v_dmac = dc; 2506 vreq->v_dmaw = 0; 2507 vreq->v_ndmacs = ndcs; 2508 vreq->v_ndmaws = ndws; 2509 vreq->v_nslots = ndws; 2510 vreq->v_status = VREQ_DMABUF_BOUND; 2511 /*FALLTHRU*/ 2512 2513 case VREQ_DMABUF_BOUND: 2514 /* 2515 * get ge_slot, callback is set upon failure from gs_get(), 2516 * if not set previously 2517 */ 2518 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2519 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2520 ddi_get_name_addr(vdp->xdf_dip))); 2521 return (DDI_FAILURE); 2522 } 2523 2524 vreq->v_gs = gs; 2525 gs->vreq = vreq; 2526 vreq->v_status = VREQ_GS_ALLOCED; 2527 break; 2528 2529 case VREQ_GS_ALLOCED: 2530 /* nothing need to be done */ 2531 break; 2532 2533 case VREQ_DMAWIN_DONE: 2534 /* 2535 * move to the next dma window 2536 */ 2537 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 2538 2539 /* get a ge_slot for this DMA window */ 2540 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2541 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2542 ddi_get_name_addr(vdp->xdf_dip))); 2543 return (DDI_FAILURE); 2544 } 2545 2546 vreq->v_gs = gs; 2547 gs->vreq = vreq; 2548 vreq->v_dmaw++; 2549 rc = ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 2550 &vreq->v_dmac, &vreq->v_ndmacs); 2551 ASSERT(rc == DDI_SUCCESS); 2552 vreq->v_status = VREQ_GS_ALLOCED; 2553 break; 2554 2555 default: 2556 return (DDI_FAILURE); 2557 } 2558 2559 return (DDI_SUCCESS); 2560 } 2561 2562 static ge_slot_t * 2563 gs_get(xdf_t *vdp, int isread) 2564 { 2565 grant_ref_t gh; 2566 ge_slot_t *gs; 2567 2568 /* try to alloc GTEs needed in this slot, first */ 2569 if (gnttab_alloc_grant_references( 2570 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 2571 if (vdp->xdf_gnt_callback.next == NULL) { 2572 SETDMACBON(vdp); 2573 gnttab_request_free_callback( 2574 &vdp->xdf_gnt_callback, 2575 (void (*)(void *))xdf_dmacallback, 2576 (void *)vdp, 2577 BLKIF_MAX_SEGMENTS_PER_REQUEST); 2578 } 2579 return (NULL); 2580 } 2581 2582 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 2583 if (gs == NULL) { 2584 gnttab_free_grant_references(gh); 2585 if (vdp->xdf_timeout_id == 0) 2586 /* restart I/O after one second */ 2587 vdp->xdf_timeout_id = 2588 timeout(xdf_timeout_handler, vdp, hz); 2589 return (NULL); 2590 } 2591 2592 /* init gs_slot */ 2593 list_insert_head(&vdp->xdf_gs_act, (void *)gs); 2594 gs->oeid = vdp->xdf_peer; 2595 gs->isread = isread; 2596 gs->ghead = gh; 2597 gs->ngrefs = 0; 2598 2599 return (gs); 2600 } 2601 2602 static void 2603 gs_free(xdf_t *vdp, ge_slot_t *gs) 2604 { 2605 int i; 2606 grant_ref_t *gp = gs->ge; 2607 int ngrefs = gs->ngrefs; 2608 boolean_t isread = gs->isread; 2609 2610 list_remove(&vdp->xdf_gs_act, (void *)gs); 2611 2612 /* release all grant table entry resources used in this slot */ 2613 for (i = 0; i < ngrefs; i++, gp++) 2614 gnttab_end_foreign_access(*gp, !isread, 0); 2615 gnttab_free_grant_references(gs->ghead); 2616 2617 kmem_cache_free(xdf_gs_cache, (void *)gs); 2618 } 2619 2620 static grant_ref_t 2621 gs_grant(ge_slot_t *gs, mfn_t mfn) 2622 { 2623 grant_ref_t gr = gnttab_claim_grant_reference(&gs->ghead); 2624 2625 ASSERT(gr != -1); 2626 ASSERT(gs->ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 2627 gs->ge[gs->ngrefs++] = gr; 2628 gnttab_grant_foreign_access_ref(gr, gs->oeid, mfn, !gs->isread); 2629 2630 return (gr); 2631 } 2632 2633 static void 2634 unexpectedie(xdf_t *vdp) 2635 { 2636 /* clean up I/Os in ring that have responses */ 2637 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 2638 mutex_exit(&vdp->xdf_dev_lk); 2639 (void) xdf_intr((caddr_t)vdp); 2640 mutex_enter(&vdp->xdf_dev_lk); 2641 } 2642 2643 /* free up all grant table entries */ 2644 while (!list_is_empty(&vdp->xdf_gs_act)) 2645 gs_free(vdp, list_head(&vdp->xdf_gs_act)); 2646 2647 /* 2648 * move bp back to active list orderly 2649 * vreq_busy is updated in vreq_free() 2650 */ 2651 while (!list_is_empty(&vdp->xdf_vreq_act)) { 2652 v_req_t *vreq = list_head(&vdp->xdf_vreq_act); 2653 buf_t *bp = vreq->v_buf; 2654 2655 bp->av_back = NULL; 2656 bp->b_resid = bp->b_bcount; 2657 if (vdp->xdf_f_act == NULL) { 2658 vdp->xdf_f_act = vdp->xdf_l_act = bp; 2659 } else { 2660 /* move to the head of list */ 2661 bp->av_forw = vdp->xdf_f_act; 2662 vdp->xdf_f_act = bp; 2663 } 2664 if (vdp->xdf_xdev_iostat != NULL) 2665 kstat_runq_back_to_waitq( 2666 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 2667 vreq_free(vdp, vreq); 2668 } 2669 } 2670 2671 static void 2672 xdfmin(struct buf *bp) 2673 { 2674 if (bp->b_bcount > xdf_maxphys) 2675 bp->b_bcount = xdf_maxphys; 2676 } 2677 2678 void 2679 xdf_kstat_delete(dev_info_t *dip) 2680 { 2681 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2682 kstat_t *kstat; 2683 2684 /* 2685 * The locking order here is xdf_iostat_lk and then xdf_dev_lk. 2686 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer 2687 * and the contents of the our kstat. xdf_iostat_lk is used 2688 * to protect the allocation and freeing of the actual kstat. 2689 * xdf_dev_lk can't be used for this purpose because kstat 2690 * readers use it to access the contents of the kstat and 2691 * hence it can't be held when calling kstat_delete(). 2692 */ 2693 mutex_enter(&vdp->xdf_iostat_lk); 2694 mutex_enter(&vdp->xdf_dev_lk); 2695 2696 if (vdp->xdf_xdev_iostat == NULL) { 2697 mutex_exit(&vdp->xdf_dev_lk); 2698 mutex_exit(&vdp->xdf_iostat_lk); 2699 return; 2700 } 2701 2702 kstat = vdp->xdf_xdev_iostat; 2703 vdp->xdf_xdev_iostat = NULL; 2704 mutex_exit(&vdp->xdf_dev_lk); 2705 2706 kstat_delete(kstat); 2707 mutex_exit(&vdp->xdf_iostat_lk); 2708 } 2709 2710 int 2711 xdf_kstat_create(dev_info_t *dip, char *ks_module, int ks_instance) 2712 { 2713 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2714 2715 /* See comment about locking in xdf_kstat_delete(). */ 2716 mutex_enter(&vdp->xdf_iostat_lk); 2717 mutex_enter(&vdp->xdf_dev_lk); 2718 2719 if (vdp->xdf_xdev_iostat != NULL) { 2720 mutex_exit(&vdp->xdf_dev_lk); 2721 mutex_exit(&vdp->xdf_iostat_lk); 2722 return (-1); 2723 } 2724 2725 if ((vdp->xdf_xdev_iostat = kstat_create( 2726 ks_module, ks_instance, NULL, "disk", 2727 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 2728 mutex_exit(&vdp->xdf_dev_lk); 2729 mutex_exit(&vdp->xdf_iostat_lk); 2730 return (-1); 2731 } 2732 2733 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 2734 kstat_install(vdp->xdf_xdev_iostat); 2735 mutex_exit(&vdp->xdf_dev_lk); 2736 mutex_exit(&vdp->xdf_iostat_lk); 2737 2738 return (0); 2739 } 2740 2741 #if defined(XPV_HVM_DRIVER) 2742 2743 typedef struct xdf_hvm_entry { 2744 list_node_t xdf_he_list; 2745 char *xdf_he_path; 2746 dev_info_t *xdf_he_dip; 2747 } xdf_hvm_entry_t; 2748 2749 static list_t xdf_hvm_list; 2750 static kmutex_t xdf_hvm_list_lock; 2751 2752 static xdf_hvm_entry_t * 2753 i_xdf_hvm_find(char *path, dev_info_t *dip) 2754 { 2755 xdf_hvm_entry_t *i; 2756 2757 ASSERT((path != NULL) || (dip != NULL)); 2758 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); 2759 2760 i = list_head(&xdf_hvm_list); 2761 while (i != NULL) { 2762 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { 2763 i = list_next(&xdf_hvm_list, i); 2764 continue; 2765 } 2766 if ((dip != NULL) && (i->xdf_he_dip != dip)) { 2767 i = list_next(&xdf_hvm_list, i); 2768 continue; 2769 } 2770 break; 2771 } 2772 return (i); 2773 } 2774 2775 dev_info_t * 2776 xdf_hvm_hold(char *path) 2777 { 2778 xdf_hvm_entry_t *i; 2779 dev_info_t *dip; 2780 2781 mutex_enter(&xdf_hvm_list_lock); 2782 i = i_xdf_hvm_find(path, NULL); 2783 if (i == NULL) { 2784 mutex_exit(&xdf_hvm_list_lock); 2785 return (B_FALSE); 2786 } 2787 ndi_hold_devi(dip = i->xdf_he_dip); 2788 mutex_exit(&xdf_hvm_list_lock); 2789 return (dip); 2790 } 2791 2792 static void 2793 xdf_hvm_add(dev_info_t *dip) 2794 { 2795 xdf_hvm_entry_t *i; 2796 char *path; 2797 2798 /* figure out the path for the dip */ 2799 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 2800 (void) ddi_pathname(dip, path); 2801 2802 i = kmem_alloc(sizeof (*i), KM_SLEEP); 2803 i->xdf_he_dip = dip; 2804 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); 2805 2806 mutex_enter(&xdf_hvm_list_lock); 2807 ASSERT(i_xdf_hvm_find(path, NULL) == NULL); 2808 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); 2809 list_insert_head(&xdf_hvm_list, i); 2810 mutex_exit(&xdf_hvm_list_lock); 2811 2812 kmem_free(path, MAXPATHLEN); 2813 } 2814 2815 static void 2816 xdf_hvm_rm(dev_info_t *dip) 2817 { 2818 xdf_hvm_entry_t *i; 2819 2820 mutex_enter(&xdf_hvm_list_lock); 2821 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); 2822 list_remove(&xdf_hvm_list, i); 2823 mutex_exit(&xdf_hvm_list_lock); 2824 2825 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); 2826 kmem_free(i, sizeof (*i)); 2827 } 2828 2829 static void 2830 xdf_hvm_init(void) 2831 { 2832 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), 2833 offsetof(xdf_hvm_entry_t, xdf_he_list)); 2834 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); 2835 } 2836 2837 static void 2838 xdf_hvm_fini(void) 2839 { 2840 ASSERT(list_head(&xdf_hvm_list) == NULL); 2841 list_destroy(&xdf_hvm_list); 2842 mutex_destroy(&xdf_hvm_list_lock); 2843 } 2844 2845 int 2846 xdf_hvm_connect(dev_info_t *dip) 2847 { 2848 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2849 int rv; 2850 2851 /* do cv_wait until connected or failed */ 2852 mutex_enter(&vdp->xdf_dev_lk); 2853 rv = xdf_connect(vdp, B_TRUE); 2854 mutex_exit(&vdp->xdf_dev_lk); 2855 return ((rv == XD_READY) ? 0 : -1); 2856 } 2857 2858 int 2859 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2860 { 2861 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2862 2863 /* sanity check the requested physical geometry */ 2864 mutex_enter(&vdp->xdf_dev_lk); 2865 if ((geomp->g_secsize != XB_BSIZE) || 2866 (geomp->g_capacity == 0)) { 2867 mutex_exit(&vdp->xdf_dev_lk); 2868 return (EINVAL); 2869 } 2870 2871 /* 2872 * If we've already connected to the backend device then make sure 2873 * we're not defining a physical geometry larger than our backend 2874 * device. 2875 */ 2876 if ((vdp->xdf_xdev_nblocks != 0) && 2877 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { 2878 mutex_exit(&vdp->xdf_dev_lk); 2879 return (EINVAL); 2880 } 2881 2882 vdp->xdf_pgeom = *geomp; 2883 mutex_exit(&vdp->xdf_dev_lk); 2884 2885 /* force a re-validation */ 2886 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 2887 2888 return (0); 2889 } 2890 2891 #endif /* XPV_HVM_DRIVER */ 2892