1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * xdf.c - Xen Virtual Block Device Driver 29 * TODO: 30 * - support alternate block size (currently only DEV_BSIZE supported) 31 * - revalidate geometry for removable devices 32 */ 33 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/conf.h> 37 #include <sys/cmlb.h> 38 #include <sys/dkio.h> 39 #include <sys/promif.h> 40 #include <sys/sysmacros.h> 41 #include <sys/kstat.h> 42 #include <sys/mach_mmu.h> 43 #ifdef XPV_HVM_DRIVER 44 #include <sys/xpv_support.h> 45 #include <sys/sunndi.h> 46 #endif /* XPV_HVM_DRIVER */ 47 #include <public/io/xenbus.h> 48 #include <xen/sys/xenbus_impl.h> 49 #include <xen/sys/xendev.h> 50 #include <sys/gnttab.h> 51 #include <sys/scsi/generic/inquiry.h> 52 #include <xen/io/blkif_impl.h> 53 #include <io/xdf.h> 54 55 #define FLUSH_DISKCACHE 0x1 56 #define WRITE_BARRIER 0x2 57 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 58 #define USE_WRITE_BARRIER(vdp) \ 59 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 60 #define USE_FLUSH_DISKCACHE(vdp) \ 61 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 62 #define IS_WRITE_BARRIER(vdp, bp) \ 63 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 64 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 65 #define IS_FLUSH_DISKCACHE(bp) \ 66 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 67 68 static void *vbd_ss; 69 static kmem_cache_t *xdf_vreq_cache; 70 static kmem_cache_t *xdf_gs_cache; 71 static int xdf_maxphys = XB_MAXPHYS; 72 int xdfdebug = 0; 73 extern int do_polled_io; 74 diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 75 int xdf_barrier_flush_disable = 0; 76 77 /* 78 * dev_ops and cb_ops entrypoints 79 */ 80 static int xdf_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 81 static int xdf_attach(dev_info_t *, ddi_attach_cmd_t); 82 static int xdf_detach(dev_info_t *, ddi_detach_cmd_t); 83 static int xdf_reset(dev_info_t *, ddi_reset_cmd_t); 84 static int xdf_open(dev_t *, int, int, cred_t *); 85 static int xdf_close(dev_t, int, int, struct cred *); 86 static int xdf_strategy(struct buf *); 87 static int xdf_read(dev_t, struct uio *, cred_t *); 88 static int xdf_aread(dev_t, struct aio_req *, cred_t *); 89 static int xdf_write(dev_t, struct uio *, cred_t *); 90 static int xdf_awrite(dev_t, struct aio_req *, cred_t *); 91 static int xdf_dump(dev_t, caddr_t, daddr_t, int); 92 static int xdf_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 93 static uint_t xdf_intr(caddr_t); 94 static int xdf_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 95 caddr_t, int *); 96 97 /* 98 * misc private functions 99 */ 100 static int xdf_suspend(dev_info_t *); 101 static int xdf_resume(dev_info_t *); 102 static int xdf_start_connect(xdf_t *); 103 static int xdf_start_disconnect(xdf_t *); 104 static int xdf_post_connect(xdf_t *); 105 static void xdf_post_disconnect(xdf_t *); 106 static void xdf_oe_change(dev_info_t *, ddi_eventcookie_t, void *, void *); 107 static void xdf_iostart(xdf_t *); 108 static void xdf_iofini(xdf_t *, uint64_t, int); 109 static int xdf_prepare_rreq(xdf_t *, struct buf *, blkif_request_t *); 110 static int xdf_drain_io(xdf_t *); 111 static boolean_t xdf_isopen(xdf_t *, int); 112 static int xdf_check_state_transition(xdf_t *, XenbusState); 113 static int xdf_connect(xdf_t *, boolean_t); 114 static int xdf_dmacallback(caddr_t); 115 static void xdf_timeout_handler(void *); 116 static uint_t xdf_iorestart(caddr_t); 117 static v_req_t *vreq_get(xdf_t *, buf_t *); 118 static void vreq_free(xdf_t *, v_req_t *); 119 static int vreq_setup(xdf_t *, v_req_t *); 120 static ge_slot_t *gs_get(xdf_t *, int); 121 static void gs_free(xdf_t *, ge_slot_t *); 122 static grant_ref_t gs_grant(ge_slot_t *, mfn_t); 123 static void unexpectedie(xdf_t *); 124 static void xdfmin(struct buf *); 125 static void xdf_synthetic_pgeom(dev_info_t *, cmlb_geom_t *); 126 extern int xdf_kstat_create(dev_info_t *, char *, int); 127 extern void xdf_kstat_delete(dev_info_t *); 128 129 #if defined(XPV_HVM_DRIVER) 130 static void xdf_hvm_add(dev_info_t *); 131 static void xdf_hvm_rm(dev_info_t *); 132 static void xdf_hvm_init(void); 133 static void xdf_hvm_fini(void); 134 #endif /* XPV_HVM_DRIVER */ 135 136 static struct cb_ops xdf_cbops = { 137 xdf_open, 138 xdf_close, 139 xdf_strategy, 140 nodev, 141 xdf_dump, 142 xdf_read, 143 xdf_write, 144 xdf_ioctl, 145 nodev, 146 nodev, 147 nodev, 148 nochpoll, 149 xdf_prop_op, 150 NULL, 151 D_MP | D_NEW | D_64BIT, 152 CB_REV, 153 xdf_aread, 154 xdf_awrite 155 }; 156 157 struct dev_ops xdf_devops = { 158 DEVO_REV, /* devo_rev */ 159 0, /* devo_refcnt */ 160 xdf_getinfo, /* devo_getinfo */ 161 nulldev, /* devo_identify */ 162 nulldev, /* devo_probe */ 163 xdf_attach, /* devo_attach */ 164 xdf_detach, /* devo_detach */ 165 xdf_reset, /* devo_reset */ 166 &xdf_cbops, /* devo_cb_ops */ 167 (struct bus_ops *)NULL, /* devo_bus_ops */ 168 NULL, /* devo_power */ 169 ddi_quiesce_not_supported, /* devo_quiesce */ 170 }; 171 172 static struct modldrv modldrv = { 173 &mod_driverops, /* Type of module. This one is a driver */ 174 "virtual block driver", /* short description */ 175 &xdf_devops /* driver specific ops */ 176 }; 177 178 static struct modlinkage xdf_modlinkage = { 179 MODREV_1, (void *)&modldrv, NULL 180 }; 181 182 /* 183 * I/O buffer DMA attributes 184 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 185 */ 186 static ddi_dma_attr_t xb_dma_attr = { 187 DMA_ATTR_V0, 188 (uint64_t)0, /* lowest address */ 189 (uint64_t)0xffffffffffffffff, /* highest usable address */ 190 (uint64_t)0xffffff, /* DMA counter limit max */ 191 (uint64_t)XB_BSIZE, /* alignment in bytes */ 192 XB_BSIZE - 1, /* bitmap of burst sizes */ 193 XB_BSIZE, /* min transfer */ 194 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 195 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 196 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 197 XB_BSIZE, /* granularity */ 198 0, /* flags (reserved) */ 199 }; 200 201 static ddi_device_acc_attr_t xc_acc_attr = { 202 DDI_DEVICE_ATTR_V0, 203 DDI_NEVERSWAP_ACC, 204 DDI_STRICTORDER_ACC 205 }; 206 207 /* callbacks from commmon label */ 208 209 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); 210 int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 211 212 static cmlb_tg_ops_t xdf_lb_ops = { 213 TG_DK_OPS_VERSION_1, 214 xdf_lb_rdwr, 215 xdf_lb_getinfo 216 }; 217 218 int 219 _init(void) 220 { 221 int rc; 222 223 if ((rc = ddi_soft_state_init(&vbd_ss, sizeof (xdf_t), 0)) != 0) 224 return (rc); 225 226 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 227 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 228 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 229 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 230 231 #if defined(XPV_HVM_DRIVER) 232 xdf_hvm_init(); 233 #endif /* XPV_HVM_DRIVER */ 234 235 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 236 #if defined(XPV_HVM_DRIVER) 237 xdf_hvm_fini(); 238 #endif /* XPV_HVM_DRIVER */ 239 kmem_cache_destroy(xdf_vreq_cache); 240 kmem_cache_destroy(xdf_gs_cache); 241 ddi_soft_state_fini(&vbd_ss); 242 return (rc); 243 } 244 245 return (rc); 246 } 247 248 int 249 _fini(void) 250 { 251 252 int err; 253 if ((err = mod_remove(&xdf_modlinkage)) != 0) 254 return (err); 255 256 #if defined(XPV_HVM_DRIVER) 257 xdf_hvm_fini(); 258 #endif /* XPV_HVM_DRIVER */ 259 260 kmem_cache_destroy(xdf_vreq_cache); 261 kmem_cache_destroy(xdf_gs_cache); 262 ddi_soft_state_fini(&vbd_ss); 263 264 return (0); 265 } 266 267 int 268 _info(struct modinfo *modinfop) 269 { 270 return (mod_info(&xdf_modlinkage, modinfop)); 271 } 272 273 /*ARGSUSED*/ 274 static int 275 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 276 { 277 int instance; 278 xdf_t *vbdp; 279 280 instance = XDF_INST(getminor((dev_t)arg)); 281 282 switch (cmd) { 283 case DDI_INFO_DEVT2DEVINFO: 284 if ((vbdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) { 285 *rp = NULL; 286 return (DDI_FAILURE); 287 } 288 *rp = vbdp->xdf_dip; 289 return (DDI_SUCCESS); 290 291 case DDI_INFO_DEVT2INSTANCE: 292 *rp = (void *)(uintptr_t)instance; 293 return (DDI_SUCCESS); 294 295 default: 296 return (DDI_FAILURE); 297 } 298 } 299 300 static int 301 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 302 char *name, caddr_t valuep, int *lengthp) 303 { 304 xdf_t *vdp; 305 306 if ((vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(dip))) == NULL) 307 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 308 name, valuep, lengthp)); 309 310 return (cmlb_prop_op(vdp->xdf_vd_lbl, 311 dev, dip, prop_op, mod_flags, name, valuep, lengthp, 312 XDF_PART(getminor(dev)), NULL)); 313 } 314 315 static int 316 xdf_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 317 { 318 xdf_t *vdp; 319 ddi_iblock_cookie_t softibc; 320 int instance; 321 322 xdfdebug = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_NOTPROM, 323 "xdfdebug", 0); 324 325 switch (cmd) { 326 case DDI_ATTACH: 327 break; 328 329 case DDI_RESUME: 330 return (xdf_resume(devi)); 331 332 default: 333 return (DDI_FAILURE); 334 } 335 336 instance = ddi_get_instance(devi); 337 if (ddi_soft_state_zalloc(vbd_ss, instance) != DDI_SUCCESS) 338 return (DDI_FAILURE); 339 340 DPRINTF(DDI_DBG, ("xdf%d: attaching\n", instance)); 341 vdp = ddi_get_soft_state(vbd_ss, instance); 342 ddi_set_driver_private(devi, vdp); 343 vdp->xdf_dip = devi; 344 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 345 346 if (ddi_get_iblock_cookie(devi, 0, &vdp->xdf_ibc) != DDI_SUCCESS) { 347 cmn_err(CE_WARN, "xdf@%s: failed to get iblock cookie", 348 ddi_get_name_addr(devi)); 349 goto errout0; 350 } 351 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)vdp->xdf_ibc); 352 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)vdp->xdf_ibc); 353 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, 354 (void *)vdp->xdf_ibc); 355 356 if (ddi_get_soft_iblock_cookie(devi, DDI_SOFTINT_LOW, &softibc) 357 != DDI_SUCCESS) { 358 cmn_err(CE_WARN, "xdf@%s: failed to get softintr iblock cookie", 359 ddi_get_name_addr(devi)); 360 goto errout0; 361 } 362 if (ddi_add_softintr(devi, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 363 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 364 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 365 ddi_get_name_addr(devi)); 366 goto errout0; 367 } 368 369 #if !defined(XPV_HVM_DRIVER) 370 /* create kstat for iostat(1M) */ 371 if (xdf_kstat_create(devi, "xdf", instance) != 0) { 372 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 373 ddi_get_name_addr(devi)); 374 goto errout0; 375 } 376 #endif /* !XPV_HVM_DRIVER */ 377 378 /* driver handles kernel-issued IOCTLs */ 379 if (ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP, 380 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 381 cmn_err(CE_WARN, "xdf@%s: cannot create DDI_KERNEL_IOCTL prop", 382 ddi_get_name_addr(devi)); 383 goto errout0; 384 } 385 386 /* 387 * Initialize the physical geometry stucture. Note that currently 388 * we don't know the size of the backend device so the number 389 * of blocks on the device will be initialized to zero. Once 390 * we connect to the backend device we'll update the physical 391 * geometry to reflect the real size of the device. 392 */ 393 xdf_synthetic_pgeom(devi, &vdp->xdf_pgeom); 394 395 /* 396 * create default device minor nodes: non-removable disk 397 * we will adjust minor nodes after we are connected w/ backend 398 */ 399 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 400 if (cmlb_attach(devi, &xdf_lb_ops, DTYPE_DIRECT, 0, 1, 401 DDI_NT_BLOCK_XVMD, 402 #if defined(XPV_HVM_DRIVER) 403 CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT | 404 CMLB_INTERNAL_MINOR_NODES, 405 #else /* !XPV_HVM_DRIVER */ 406 CMLB_FAKE_LABEL_ONE_PARTITION, 407 #endif /* !XPV_HVM_DRIVER */ 408 vdp->xdf_vd_lbl, NULL) != 0) { 409 cmn_err(CE_WARN, "xdf@%s: default cmlb attach failed", 410 ddi_get_name_addr(devi)); 411 goto errout0; 412 } 413 414 /* 415 * We ship with cache-enabled disks 416 */ 417 vdp->xdf_wce = 1; 418 419 mutex_enter(&vdp->xdf_cb_lk); 420 421 /* Watch backend XenbusState change */ 422 if (xvdi_add_event_handler(devi, XS_OE_STATE, xdf_oe_change, 423 NULL) != DDI_SUCCESS) { 424 mutex_exit(&vdp->xdf_cb_lk); 425 goto errout0; 426 } 427 428 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 429 cmn_err(CE_WARN, "xdf@%s: start connection failed", 430 ddi_get_name_addr(devi)); 431 (void) xdf_start_disconnect(vdp); 432 mutex_exit(&vdp->xdf_cb_lk); 433 goto errout1; 434 } 435 436 mutex_exit(&vdp->xdf_cb_lk); 437 438 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 439 offsetof(v_req_t, v_link)); 440 list_create(&vdp->xdf_gs_act, sizeof (ge_slot_t), 441 offsetof(ge_slot_t, link)); 442 443 #if defined(XPV_HVM_DRIVER) 444 xdf_hvm_add(devi); 445 446 (void) ddi_prop_update_int(DDI_DEV_T_NONE, devi, DDI_NO_AUTODETACH, 1); 447 448 /* 449 * Report our version to dom0. 450 */ 451 if (xenbus_printf(XBT_NULL, "hvmpv/xdf", "version", "%d", 452 HVMPV_XDF_VERS)) 453 cmn_err(CE_WARN, "xdf: couldn't write version\n"); 454 #endif /* XPV_HVM_DRIVER */ 455 456 ddi_report_dev(devi); 457 458 DPRINTF(DDI_DBG, ("xdf%d: attached\n", instance)); 459 460 return (DDI_SUCCESS); 461 462 errout1: 463 xvdi_remove_event_handler(devi, XS_OE_STATE); 464 errout0: 465 if (vdp->xdf_vd_lbl != NULL) { 466 cmlb_detach(vdp->xdf_vd_lbl, NULL); 467 cmlb_free_handle(&vdp->xdf_vd_lbl); 468 vdp->xdf_vd_lbl = NULL; 469 } 470 #if !defined(XPV_HVM_DRIVER) 471 xdf_kstat_delete(devi); 472 #endif /* !XPV_HVM_DRIVER */ 473 if (vdp->xdf_softintr_id != NULL) 474 ddi_remove_softintr(vdp->xdf_softintr_id); 475 if (vdp->xdf_ibc != NULL) { 476 mutex_destroy(&vdp->xdf_cb_lk); 477 mutex_destroy(&vdp->xdf_dev_lk); 478 } 479 cv_destroy(&vdp->xdf_dev_cv); 480 ddi_soft_state_free(vbd_ss, instance); 481 ddi_set_driver_private(devi, NULL); 482 ddi_prop_remove_all(devi); 483 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(devi)); 484 return (DDI_FAILURE); 485 } 486 487 static int 488 xdf_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 489 { 490 xdf_t *vdp; 491 int instance; 492 493 switch (cmd) { 494 495 case DDI_PM_SUSPEND: 496 break; 497 498 case DDI_SUSPEND: 499 return (xdf_suspend(devi)); 500 501 case DDI_DETACH: 502 break; 503 504 default: 505 return (DDI_FAILURE); 506 } 507 508 instance = ddi_get_instance(devi); 509 DPRINTF(DDI_DBG, ("xdf%d: detaching\n", instance)); 510 vdp = ddi_get_soft_state(vbd_ss, instance); 511 512 if (vdp == NULL) 513 return (DDI_FAILURE); 514 515 mutex_enter(&vdp->xdf_dev_lk); 516 if (xdf_isopen(vdp, -1)) { 517 mutex_exit(&vdp->xdf_dev_lk); 518 return (DDI_FAILURE); 519 } 520 521 if (vdp->xdf_status != XD_CLOSED) { 522 mutex_exit(&vdp->xdf_dev_lk); 523 return (DDI_FAILURE); 524 } 525 526 #if defined(XPV_HVM_DRIVER) 527 xdf_hvm_rm(devi); 528 #endif /* XPV_HVM_DRIVER */ 529 530 ASSERT(!ISDMACBON(vdp)); 531 mutex_exit(&vdp->xdf_dev_lk); 532 533 if (vdp->xdf_timeout_id != 0) 534 (void) untimeout(vdp->xdf_timeout_id); 535 536 xvdi_remove_event_handler(devi, XS_OE_STATE); 537 538 /* we'll support backend running in domU later */ 539 #ifdef DOMU_BACKEND 540 (void) xvdi_post_event(devi, XEN_HP_REMOVE); 541 #endif 542 543 list_destroy(&vdp->xdf_vreq_act); 544 list_destroy(&vdp->xdf_gs_act); 545 ddi_prop_remove_all(devi); 546 xdf_kstat_delete(devi); 547 ddi_remove_softintr(vdp->xdf_softintr_id); 548 ddi_set_driver_private(devi, NULL); 549 cv_destroy(&vdp->xdf_dev_cv); 550 mutex_destroy(&vdp->xdf_cb_lk); 551 mutex_destroy(&vdp->xdf_dev_lk); 552 if (vdp->xdf_cache_flush_block != NULL) 553 kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE); 554 ddi_soft_state_free(vbd_ss, instance); 555 return (DDI_SUCCESS); 556 } 557 558 static int 559 xdf_suspend(dev_info_t *devi) 560 { 561 xdf_t *vdp; 562 int instance; 563 enum xdf_state st; 564 565 instance = ddi_get_instance(devi); 566 567 if (xdfdebug & SUSRES_DBG) 568 xen_printf("xdf_suspend: xdf#%d\n", instance); 569 570 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 571 return (DDI_FAILURE); 572 573 xvdi_suspend(devi); 574 575 mutex_enter(&vdp->xdf_cb_lk); 576 mutex_enter(&vdp->xdf_dev_lk); 577 st = vdp->xdf_status; 578 /* change status to stop further I/O requests */ 579 if (st == XD_READY) 580 vdp->xdf_status = XD_SUSPEND; 581 mutex_exit(&vdp->xdf_dev_lk); 582 mutex_exit(&vdp->xdf_cb_lk); 583 584 /* make sure no more I/O responses left in the ring buffer */ 585 if ((st == XD_INIT) || (st == XD_READY)) { 586 #ifdef XPV_HVM_DRIVER 587 ec_unbind_evtchn(vdp->xdf_evtchn); 588 xvdi_free_evtchn(devi); 589 #else /* !XPV_HVM_DRIVER */ 590 (void) ddi_remove_intr(devi, 0, NULL); 591 #endif /* !XPV_HVM_DRIVER */ 592 (void) xdf_drain_io(vdp); 593 /* 594 * no need to teardown the ring buffer here 595 * it will be simply re-init'ed during resume when 596 * we call xvdi_alloc_ring 597 */ 598 } 599 600 if (xdfdebug & SUSRES_DBG) 601 xen_printf("xdf_suspend: SUCCESS\n"); 602 603 return (DDI_SUCCESS); 604 } 605 606 /*ARGSUSED*/ 607 static int 608 xdf_resume(dev_info_t *devi) 609 { 610 xdf_t *vdp; 611 int instance; 612 613 instance = ddi_get_instance(devi); 614 if (xdfdebug & SUSRES_DBG) 615 xen_printf("xdf_resume: xdf%d\n", instance); 616 617 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 618 return (DDI_FAILURE); 619 620 mutex_enter(&vdp->xdf_cb_lk); 621 622 if (xvdi_resume(devi) != DDI_SUCCESS) { 623 mutex_exit(&vdp->xdf_cb_lk); 624 return (DDI_FAILURE); 625 } 626 627 mutex_enter(&vdp->xdf_dev_lk); 628 ASSERT(vdp->xdf_status != XD_READY); 629 vdp->xdf_status = XD_UNKNOWN; 630 mutex_exit(&vdp->xdf_dev_lk); 631 632 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 633 mutex_exit(&vdp->xdf_cb_lk); 634 return (DDI_FAILURE); 635 } 636 637 mutex_exit(&vdp->xdf_cb_lk); 638 639 if (xdfdebug & SUSRES_DBG) 640 xen_printf("xdf_resume: done\n"); 641 return (DDI_SUCCESS); 642 } 643 644 /*ARGSUSED*/ 645 static int 646 xdf_reset(dev_info_t *devi, ddi_reset_cmd_t cmd) 647 { 648 xdf_t *vdp; 649 int instance; 650 651 instance = ddi_get_instance(devi); 652 DPRINTF(DDI_DBG, ("xdf%d: resetting\n", instance)); 653 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 654 return (DDI_FAILURE); 655 656 /* 657 * wait for any outstanding I/O to complete 658 */ 659 (void) xdf_drain_io(vdp); 660 661 DPRINTF(DDI_DBG, ("xdf%d: reset complete\n", instance)); 662 return (DDI_SUCCESS); 663 } 664 665 static int 666 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 667 { 668 minor_t minor; 669 xdf_t *vdp; 670 int part; 671 ulong_t parbit; 672 diskaddr_t p_blkct = 0; 673 boolean_t firstopen; 674 boolean_t nodelay; 675 676 minor = getminor(*devp); 677 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 678 return (ENXIO); 679 680 nodelay = (flag & (FNDELAY | FNONBLOCK)); 681 682 DPRINTF(DDI_DBG, ("xdf%d: opening\n", XDF_INST(minor))); 683 684 /* do cv_wait until connected or failed */ 685 mutex_enter(&vdp->xdf_dev_lk); 686 if (!nodelay && (xdf_connect(vdp, B_TRUE) != XD_READY)) { 687 mutex_exit(&vdp->xdf_dev_lk); 688 return (ENXIO); 689 } 690 691 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 692 mutex_exit(&vdp->xdf_dev_lk); 693 return (EROFS); 694 } 695 696 part = XDF_PART(minor); 697 parbit = 1 << part; 698 if ((vdp->xdf_vd_exclopen & parbit) || 699 ((flag & FEXCL) && xdf_isopen(vdp, part))) { 700 mutex_exit(&vdp->xdf_dev_lk); 701 return (EBUSY); 702 } 703 704 /* are we the first one to open this node? */ 705 firstopen = !xdf_isopen(vdp, -1); 706 707 if (otyp == OTYP_LYR) 708 vdp->xdf_vd_lyropen[part]++; 709 710 vdp->xdf_vd_open[otyp] |= parbit; 711 712 if (flag & FEXCL) 713 vdp->xdf_vd_exclopen |= parbit; 714 715 mutex_exit(&vdp->xdf_dev_lk); 716 717 /* force a re-validation */ 718 if (firstopen) 719 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 720 721 /* 722 * check size 723 * ignore CD/DVD which contains a zero-sized s0 724 */ 725 if (!nodelay && !XD_IS_CD(vdp) && 726 ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 727 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0))) { 728 (void) xdf_close(*devp, flag, otyp, credp); 729 return (ENXIO); 730 } 731 732 return (0); 733 } 734 735 /*ARGSUSED*/ 736 static int 737 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 738 { 739 minor_t minor; 740 xdf_t *vdp; 741 int part; 742 ulong_t parbit; 743 744 minor = getminor(dev); 745 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 746 return (ENXIO); 747 748 mutex_enter(&vdp->xdf_dev_lk); 749 part = XDF_PART(minor); 750 if (!xdf_isopen(vdp, part)) { 751 mutex_exit(&vdp->xdf_dev_lk); 752 return (ENXIO); 753 } 754 parbit = 1 << part; 755 756 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); 757 if (otyp == OTYP_LYR) { 758 ASSERT(vdp->xdf_vd_lyropen[part] > 0); 759 if (--vdp->xdf_vd_lyropen[part] == 0) 760 vdp->xdf_vd_open[otyp] &= ~parbit; 761 } else { 762 vdp->xdf_vd_open[otyp] &= ~parbit; 763 } 764 vdp->xdf_vd_exclopen &= ~parbit; 765 766 mutex_exit(&vdp->xdf_dev_lk); 767 return (0); 768 } 769 770 static int 771 xdf_strategy(struct buf *bp) 772 { 773 xdf_t *vdp; 774 minor_t minor; 775 diskaddr_t p_blkct, p_blkst; 776 ulong_t nblks; 777 int part; 778 779 minor = getminor(bp->b_edev); 780 part = XDF_PART(minor); 781 782 vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)); 783 if ((vdp == NULL) || !xdf_isopen(vdp, part)) { 784 bioerror(bp, ENXIO); 785 bp->b_resid = bp->b_bcount; 786 biodone(bp); 787 return (0); 788 } 789 790 /* Check for writes to a read only device */ 791 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 792 bioerror(bp, EROFS); 793 bp->b_resid = bp->b_bcount; 794 biodone(bp); 795 return (0); 796 } 797 798 /* Check if this I/O is accessing a partition or the entire disk */ 799 if ((long)bp->b_private == XB_SLICE_NONE) { 800 /* This I/O is using an absolute offset */ 801 p_blkct = vdp->xdf_xdev_nblocks; 802 p_blkst = 0; 803 } else { 804 /* This I/O is using a partition relative offset */ 805 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 806 &p_blkst, NULL, NULL, NULL)) { 807 bioerror(bp, ENXIO); 808 bp->b_resid = bp->b_bcount; 809 biodone(bp); 810 return (0); 811 } 812 } 813 814 /* check for a starting block beyond the disk or partition limit */ 815 if (bp->b_blkno > p_blkct) { 816 DPRINTF(IO_DBG, ("xdf: block %lld exceeds VBD size %"PRIu64, 817 (longlong_t)bp->b_blkno, (uint64_t)p_blkct)); 818 bioerror(bp, EINVAL); 819 bp->b_resid = bp->b_bcount; 820 biodone(bp); 821 return (0); 822 } 823 824 /* Legacy: don't set error flag at this case */ 825 if (bp->b_blkno == p_blkct) { 826 bp->b_resid = bp->b_bcount; 827 biodone(bp); 828 return (0); 829 } 830 831 /* Adjust for partial transfer */ 832 nblks = bp->b_bcount >> XB_BSHIFT; 833 if ((bp->b_blkno + nblks) > p_blkct) { 834 bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT; 835 bp->b_bcount -= bp->b_resid; 836 } 837 838 DPRINTF(IO_DBG, ("xdf: strategy blk %lld len %lu\n", 839 (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount)); 840 841 /* Fix up the buf struct */ 842 bp->b_flags |= B_BUSY; 843 bp->av_forw = bp->av_back = NULL; /* not tagged with a v_req */ 844 bp->b_private = (void *)(uintptr_t)p_blkst; 845 846 mutex_enter(&vdp->xdf_dev_lk); 847 if (vdp->xdf_xdev_iostat != NULL) 848 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 849 if (vdp->xdf_f_act == NULL) { 850 vdp->xdf_f_act = vdp->xdf_l_act = bp; 851 } else { 852 vdp->xdf_l_act->av_forw = bp; 853 vdp->xdf_l_act = bp; 854 } 855 mutex_exit(&vdp->xdf_dev_lk); 856 857 xdf_iostart(vdp); 858 if (do_polled_io) 859 (void) xdf_drain_io(vdp); 860 return (0); 861 } 862 863 /*ARGSUSED*/ 864 static int 865 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 866 { 867 868 xdf_t *vdp; 869 minor_t minor; 870 diskaddr_t p_blkcnt; 871 int part; 872 873 minor = getminor(dev); 874 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 875 return (ENXIO); 876 877 DPRINTF(IO_DBG, ("xdf: read offset 0x%"PRIx64"\n", 878 (int64_t)uiop->uio_offset)); 879 880 part = XDF_PART(minor); 881 if (!xdf_isopen(vdp, part)) 882 return (ENXIO); 883 884 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 885 NULL, NULL, NULL, NULL)) 886 return (ENXIO); 887 888 if (U_INVAL(uiop)) 889 return (EINVAL); 890 891 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 892 } 893 894 /*ARGSUSED*/ 895 static int 896 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 897 { 898 xdf_t *vdp; 899 minor_t minor; 900 diskaddr_t p_blkcnt; 901 int part; 902 903 minor = getminor(dev); 904 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 905 return (ENXIO); 906 907 DPRINTF(IO_DBG, ("xdf: write offset 0x%"PRIx64"\n", 908 (int64_t)uiop->uio_offset)); 909 910 part = XDF_PART(minor); 911 if (!xdf_isopen(vdp, part)) 912 return (ENXIO); 913 914 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 915 NULL, NULL, NULL, NULL)) 916 return (ENXIO); 917 918 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 919 return (ENOSPC); 920 921 if (U_INVAL(uiop)) 922 return (EINVAL); 923 924 return (physio(xdf_strategy, NULL, dev, B_WRITE, minphys, uiop)); 925 } 926 927 /*ARGSUSED*/ 928 static int 929 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 930 { 931 xdf_t *vdp; 932 minor_t minor; 933 struct uio *uiop = aiop->aio_uio; 934 diskaddr_t p_blkcnt; 935 int part; 936 937 minor = getminor(dev); 938 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 939 return (ENXIO); 940 941 part = XDF_PART(minor); 942 if (!xdf_isopen(vdp, part)) 943 return (ENXIO); 944 945 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 946 NULL, NULL, NULL, NULL)) 947 return (ENXIO); 948 949 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 950 return (ENOSPC); 951 952 if (U_INVAL(uiop)) 953 return (EINVAL); 954 955 return (aphysio(xdf_strategy, anocancel, dev, B_READ, minphys, aiop)); 956 } 957 958 /*ARGSUSED*/ 959 static int 960 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 961 { 962 xdf_t *vdp; 963 minor_t minor; 964 struct uio *uiop = aiop->aio_uio; 965 diskaddr_t p_blkcnt; 966 int part; 967 968 minor = getminor(dev); 969 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 970 return (ENXIO); 971 972 part = XDF_PART(minor); 973 if (!xdf_isopen(vdp, part)) 974 return (ENXIO); 975 976 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 977 NULL, NULL, NULL, NULL)) 978 return (ENXIO); 979 980 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 981 return (ENOSPC); 982 983 if (U_INVAL(uiop)) 984 return (EINVAL); 985 986 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, minphys, aiop)); 987 } 988 989 static int 990 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 991 { 992 struct buf dumpbuf, *dbp; 993 xdf_t *vdp; 994 minor_t minor; 995 int err = 0; 996 int part; 997 diskaddr_t p_blkcnt, p_blkst; 998 999 minor = getminor(dev); 1000 if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) 1001 return (ENXIO); 1002 1003 DPRINTF(IO_DBG, ("xdf: dump addr (0x%p) blk (%ld) nblks (%d)\n", 1004 (void *)addr, blkno, nblk)); 1005 1006 part = XDF_PART(minor); 1007 if (!xdf_isopen(vdp, part)) 1008 return (ENXIO); 1009 1010 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 1011 NULL, NULL, NULL)) 1012 return (ENXIO); 1013 1014 if ((blkno + nblk) > p_blkcnt) { 1015 cmn_err(CE_WARN, "xdf: block %ld exceeds VBD size %"PRIu64, 1016 blkno + nblk, (uint64_t)p_blkcnt); 1017 return (EINVAL); 1018 } 1019 1020 dbp = &dumpbuf; 1021 bioinit(dbp); 1022 dbp->b_flags = B_BUSY; 1023 dbp->b_un.b_addr = addr; 1024 dbp->b_bcount = nblk << DEV_BSHIFT; 1025 dbp->b_blkno = blkno; 1026 dbp->b_edev = dev; 1027 dbp->b_private = (void *)(uintptr_t)p_blkst; 1028 1029 mutex_enter(&vdp->xdf_dev_lk); 1030 if (vdp->xdf_xdev_iostat != NULL) 1031 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1032 if (vdp->xdf_f_act == NULL) { 1033 vdp->xdf_f_act = vdp->xdf_l_act = dbp; 1034 } else { 1035 vdp->xdf_l_act->av_forw = dbp; 1036 vdp->xdf_l_act = dbp; 1037 } 1038 dbp->av_forw = NULL; 1039 dbp->av_back = NULL; 1040 mutex_exit(&vdp->xdf_dev_lk); 1041 xdf_iostart(vdp); 1042 err = xdf_drain_io(vdp); 1043 biofini(dbp); 1044 return (err); 1045 } 1046 1047 /*ARGSUSED*/ 1048 static int 1049 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 1050 int *rvalp) 1051 { 1052 int instance; 1053 xdf_t *vdp; 1054 minor_t minor; 1055 int part; 1056 1057 minor = getminor(dev); 1058 instance = XDF_INST(minor); 1059 1060 if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) 1061 return (ENXIO); 1062 1063 DPRINTF(IOCTL_DBG, ("xdf%d:ioctl: cmd %d (0x%x)\n", 1064 instance, cmd, cmd)); 1065 1066 part = XDF_PART(minor); 1067 if (!xdf_isopen(vdp, part)) 1068 return (ENXIO); 1069 1070 switch (cmd) { 1071 case DKIOCGMEDIAINFO: { 1072 struct dk_minfo media_info; 1073 1074 media_info.dki_lbsize = DEV_BSIZE; 1075 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; 1076 media_info.dki_media_type = DK_FIXED_DISK; 1077 1078 if (ddi_copyout(&media_info, (void *)arg, 1079 sizeof (struct dk_minfo), mode)) { 1080 return (EFAULT); 1081 } else { 1082 return (0); 1083 } 1084 } 1085 1086 case DKIOCINFO: { 1087 struct dk_cinfo info; 1088 1089 /* controller information */ 1090 if (XD_IS_CD(vdp)) 1091 info.dki_ctype = DKC_CDROM; 1092 else 1093 info.dki_ctype = DKC_VBD; 1094 1095 info.dki_cnum = 0; 1096 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 1097 1098 /* unit information */ 1099 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 1100 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 1101 info.dki_flags = DKI_FMTVOL; 1102 info.dki_partition = part; 1103 info.dki_maxtransfer = maxphys / DEV_BSIZE; 1104 info.dki_addr = 0; 1105 info.dki_space = 0; 1106 info.dki_prio = 0; 1107 info.dki_vec = 0; 1108 1109 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 1110 return (EFAULT); 1111 else 1112 return (0); 1113 } 1114 1115 case DKIOCSTATE: { 1116 enum dkio_state dkstate = DKIO_INSERTED; 1117 if (ddi_copyout(&dkstate, (void *)arg, sizeof (dkstate), 1118 mode) != 0) 1119 return (EFAULT); 1120 return (0); 1121 } 1122 1123 /* 1124 * is media removable? 1125 */ 1126 case DKIOCREMOVABLE: { 1127 int i = XD_IS_RM(vdp) ? 1 : 0; 1128 if (ddi_copyout(&i, (caddr_t)arg, sizeof (int), mode)) 1129 return (EFAULT); 1130 return (0); 1131 } 1132 1133 case DKIOCG_PHYGEOM: 1134 case DKIOCG_VIRTGEOM: 1135 case DKIOCGGEOM: 1136 case DKIOCSGEOM: 1137 case DKIOCGAPART: 1138 case DKIOCSAPART: 1139 case DKIOCGVTOC: 1140 case DKIOCSVTOC: 1141 case DKIOCPARTINFO: 1142 case DKIOCGEXTVTOC: 1143 case DKIOCSEXTVTOC: 1144 case DKIOCEXTPARTINFO: 1145 case DKIOCGMBOOT: 1146 case DKIOCSMBOOT: 1147 case DKIOCGETEFI: 1148 case DKIOCSETEFI: 1149 case DKIOCSETEXTPART: 1150 case DKIOCPARTITION: { 1151 int rc; 1152 1153 rc = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 1154 rvalp, NULL); 1155 return (rc); 1156 } 1157 1158 case DKIOCGETWCE: 1159 if (ddi_copyout(&vdp->xdf_wce, (void *)arg, 1160 sizeof (vdp->xdf_wce), mode)) 1161 return (EFAULT); 1162 return (0); 1163 case DKIOCSETWCE: 1164 if (ddi_copyin((void *)arg, &vdp->xdf_wce, 1165 sizeof (vdp->xdf_wce), mode)) 1166 return (EFAULT); 1167 return (0); 1168 case DKIOCFLUSHWRITECACHE: { 1169 int rc; 1170 struct dk_callback *dkc = (struct dk_callback *)arg; 1171 1172 if (vdp->xdf_flush_supported) { 1173 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1174 NULL, 0, 0, (void *)dev); 1175 } else if (vdp->xdf_feature_barrier && 1176 !xdf_barrier_flush_disable) { 1177 rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 1178 vdp->xdf_cache_flush_block, xdf_flush_block, 1179 DEV_BSIZE, (void *)dev); 1180 } else { 1181 return (ENOTTY); 1182 } 1183 if ((mode & FKIOCTL) && (dkc != NULL) && 1184 (dkc->dkc_callback != NULL)) { 1185 (*dkc->dkc_callback)(dkc->dkc_cookie, rc); 1186 /* need to return 0 after calling callback */ 1187 rc = 0; 1188 } 1189 return (rc); 1190 } 1191 1192 default: 1193 return (ENOTTY); 1194 } 1195 } 1196 1197 /* 1198 * xdf interrupt handler 1199 */ 1200 static uint_t 1201 xdf_intr(caddr_t arg) 1202 { 1203 xdf_t *vdp = (xdf_t *)arg; 1204 xendev_ring_t *xbr; 1205 blkif_response_t *resp; 1206 int bioerr; 1207 uint64_t id; 1208 extern int do_polled_io; 1209 uint8_t op; 1210 uint16_t status; 1211 ddi_acc_handle_t acchdl; 1212 1213 mutex_enter(&vdp->xdf_dev_lk); 1214 1215 if ((xbr = vdp->xdf_xb_ring) == NULL) { 1216 mutex_exit(&vdp->xdf_dev_lk); 1217 return (DDI_INTR_UNCLAIMED); 1218 } 1219 1220 acchdl = vdp->xdf_xb_ring_hdl; 1221 1222 /* 1223 * complete all requests which have a response 1224 */ 1225 while (resp = xvdi_ring_get_response(xbr)) { 1226 id = ddi_get64(acchdl, &resp->id); 1227 op = ddi_get8(acchdl, &resp->operation); 1228 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 1229 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 1230 op, id, status)); 1231 1232 /* 1233 * XXPV - close connection to the backend and restart 1234 */ 1235 if (status != BLKIF_RSP_OKAY) { 1236 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 1237 ddi_get_name_addr(vdp->xdf_dip), 1238 (op == BLKIF_OP_READ) ? "reading" : "writing")); 1239 bioerr = EIO; 1240 } else { 1241 bioerr = 0; 1242 } 1243 1244 xdf_iofini(vdp, id, bioerr); 1245 } 1246 1247 mutex_exit(&vdp->xdf_dev_lk); 1248 1249 if (!do_polled_io) 1250 xdf_iostart(vdp); 1251 1252 return (DDI_INTR_CLAIMED); 1253 } 1254 1255 int xdf_fbrewrites; /* how many times was our flush block rewritten */ 1256 1257 /* 1258 * Snarf new data if our flush block was re-written 1259 */ 1260 static void 1261 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 1262 { 1263 int nblks; 1264 boolean_t mapin; 1265 1266 if (IS_WRITE_BARRIER(vdp, bp)) 1267 return; /* write was a flush write */ 1268 1269 mapin = B_FALSE; 1270 nblks = bp->b_bcount >> DEV_BSHIFT; 1271 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 1272 xdf_fbrewrites++; 1273 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 1274 mapin = B_TRUE; 1275 bp_mapin(bp); 1276 } 1277 bcopy(bp->b_un.b_addr + 1278 ((xdf_flush_block - blkno) << DEV_BSHIFT), 1279 vdp->xdf_cache_flush_block, DEV_BSIZE); 1280 if (mapin) 1281 bp_mapout(bp); 1282 } 1283 } 1284 1285 static void 1286 xdf_iofini(xdf_t *vdp, uint64_t id, int bioerr) 1287 { 1288 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 1289 v_req_t *vreq = gs->vreq; 1290 buf_t *bp = vreq->v_buf; 1291 1292 gs_free(vdp, gs); 1293 if (bioerr) 1294 bioerror(bp, bioerr); 1295 vreq->v_nslots--; 1296 if (vreq->v_nslots != 0) 1297 return; 1298 1299 XDF_UPDATE_IO_STAT(vdp, bp); 1300 if (vdp->xdf_xdev_iostat != NULL) 1301 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1302 1303 if (IS_ERROR(bp)) 1304 bp->b_resid = bp->b_bcount; 1305 1306 vreq_free(vdp, vreq); 1307 biodone(bp); 1308 } 1309 1310 /* 1311 * return value of xdf_prepare_rreq() 1312 * used in xdf_iostart() 1313 */ 1314 #define XF_PARTIAL 0 /* rreq is full, not all I/O in buf transferred */ 1315 #define XF_COMP 1 /* no more I/O left in buf */ 1316 1317 static void 1318 xdf_iostart(xdf_t *vdp) 1319 { 1320 xendev_ring_t *xbr; 1321 struct buf *bp; 1322 blkif_request_t *rreq; 1323 int retval; 1324 int rreqready = 0; 1325 1326 xbr = vdp->xdf_xb_ring; 1327 1328 /* 1329 * populate the ring request(s) 1330 * 1331 * loop until there is no buf to transfer or no free slot 1332 * available in I/O ring 1333 */ 1334 mutex_enter(&vdp->xdf_dev_lk); 1335 1336 for (;;) { 1337 if (vdp->xdf_status != XD_READY) 1338 break; 1339 1340 /* active buf queue empty? */ 1341 if ((bp = vdp->xdf_f_act) == NULL) 1342 break; 1343 1344 /* try to grab a vreq for this bp */ 1345 if ((BP2VREQ(bp) == NULL) && (vreq_get(vdp, bp) == NULL)) 1346 break; 1347 /* alloc DMA/GTE resources */ 1348 if (vreq_setup(vdp, BP2VREQ(bp)) != DDI_SUCCESS) 1349 break; 1350 1351 /* get next blkif_request in the ring */ 1352 if ((rreq = xvdi_ring_get_request(xbr)) == NULL) 1353 break; 1354 bzero(rreq, sizeof (blkif_request_t)); 1355 1356 /* populate blkif_request with this buf */ 1357 rreqready++; 1358 retval = xdf_prepare_rreq(vdp, bp, rreq); 1359 if (retval == XF_COMP) { 1360 /* finish this bp, switch to next one */ 1361 if (vdp->xdf_xdev_iostat != NULL) 1362 kstat_waitq_to_runq( 1363 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1364 vdp->xdf_f_act = bp->av_forw; 1365 bp->av_forw = NULL; 1366 } 1367 } 1368 1369 /* 1370 * Send the request(s) to the backend 1371 */ 1372 if (rreqready) { 1373 if (xvdi_ring_push_request(xbr)) { 1374 DPRINTF(IO_DBG, ("xdf_iostart: " 1375 "sent request(s) to backend\n")); 1376 xvdi_notify_oe(vdp->xdf_dip); 1377 } 1378 } 1379 1380 mutex_exit(&vdp->xdf_dev_lk); 1381 } 1382 1383 /* 1384 * populate a single blkif_request_t w/ a buf 1385 */ 1386 static int 1387 xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1388 { 1389 int rval; 1390 grant_ref_t gr; 1391 uint8_t fsect, lsect; 1392 size_t bcnt; 1393 paddr_t dma_addr; 1394 off_t blk_off; 1395 dev_info_t *dip = vdp->xdf_dip; 1396 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1397 v_req_t *vreq = BP2VREQ(bp); 1398 uint64_t blkno = vreq->v_blkno; 1399 uint_t ndmacs = vreq->v_ndmacs; 1400 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1401 int seg = 0; 1402 int isread = IS_READ(bp); 1403 1404 if (isread) 1405 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1406 else { 1407 switch (vreq->v_flush_diskcache) { 1408 case FLUSH_DISKCACHE: 1409 ddi_put8(acchdl, &rreq->operation, 1410 BLKIF_OP_FLUSH_DISKCACHE); 1411 ddi_put16(acchdl, &rreq->handle, vdev); 1412 ddi_put64(acchdl, &rreq->id, 1413 (uint64_t)(uintptr_t)(vreq->v_gs)); 1414 ddi_put8(acchdl, &rreq->nr_segments, 0); 1415 return (XF_COMP); 1416 case WRITE_BARRIER: 1417 ddi_put8(acchdl, &rreq->operation, 1418 BLKIF_OP_WRITE_BARRIER); 1419 break; 1420 default: 1421 if (!vdp->xdf_wce) 1422 ddi_put8(acchdl, &rreq->operation, 1423 BLKIF_OP_WRITE_BARRIER); 1424 else 1425 ddi_put8(acchdl, &rreq->operation, 1426 BLKIF_OP_WRITE); 1427 break; 1428 } 1429 } 1430 1431 ddi_put16(acchdl, &rreq->handle, vdev); 1432 ddi_put64(acchdl, &rreq->sector_number, blkno); 1433 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(vreq->v_gs)); 1434 1435 /* 1436 * loop until all segments are populated or no more dma cookie in buf 1437 */ 1438 for (;;) { 1439 /* 1440 * Each segment of a blkif request can transfer up to 1441 * one 4K page of data. 1442 */ 1443 bcnt = vreq->v_dmac.dmac_size; 1444 ASSERT(bcnt <= PAGESIZE); 1445 ASSERT((bcnt % XB_BSIZE) == 0); 1446 dma_addr = vreq->v_dmac.dmac_laddress; 1447 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1448 ASSERT((blk_off & XB_BMASK) == 0); 1449 fsect = blk_off >> XB_BSHIFT; 1450 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1451 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1452 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1453 DPRINTF(IO_DBG, (" ""seg%d: dmacS %lu blk_off %ld\n", 1454 seg, vreq->v_dmac.dmac_size, blk_off)); 1455 gr = gs_grant(vreq->v_gs, PATOMA(dma_addr) >> PAGESHIFT); 1456 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1457 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1458 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1459 DPRINTF(IO_DBG, (" ""seg%d: fs %d ls %d gr %d dma 0x%"PRIx64 1460 "\n", seg, fsect, lsect, gr, dma_addr)); 1461 1462 blkno += (bcnt >> XB_BSHIFT); 1463 seg++; 1464 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1465 if (--ndmacs) { 1466 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1467 continue; 1468 } 1469 1470 vreq->v_status = VREQ_DMAWIN_DONE; 1471 vreq->v_blkno = blkno; 1472 if (vreq->v_dmaw + 1 == vreq->v_ndmaws) 1473 /* last win */ 1474 rval = XF_COMP; 1475 else 1476 rval = XF_PARTIAL; 1477 break; 1478 } 1479 ddi_put8(acchdl, &rreq->nr_segments, seg); 1480 DPRINTF(IO_DBG, ("xdf_prepare_rreq: request id=%"PRIx64" ready\n", 1481 rreq->id)); 1482 1483 return (rval); 1484 } 1485 1486 #define XDF_QSEC 50000 /* .005 second */ 1487 #define XDF_POLLCNT 12 /* loop for 12 times before time out */ 1488 1489 static int 1490 xdf_drain_io(xdf_t *vdp) 1491 { 1492 int pollc, rval; 1493 xendev_ring_t *xbr; 1494 1495 if (xdfdebug & SUSRES_DBG) 1496 xen_printf("xdf_drain_io: start\n"); 1497 1498 mutex_enter(&vdp->xdf_dev_lk); 1499 1500 if ((vdp->xdf_status != XD_READY) && (vdp->xdf_status != XD_SUSPEND)) 1501 goto out; 1502 1503 rval = 0; 1504 xbr = vdp->xdf_xb_ring; 1505 ASSERT(xbr != NULL); 1506 1507 for (pollc = 0; pollc < XDF_POLLCNT; pollc++) { 1508 if (xvdi_ring_has_unconsumed_responses(xbr)) { 1509 mutex_exit(&vdp->xdf_dev_lk); 1510 (void) xdf_intr((caddr_t)vdp); 1511 mutex_enter(&vdp->xdf_dev_lk); 1512 } 1513 if (!xvdi_ring_has_incomp_request(xbr)) 1514 goto out; 1515 1516 #ifndef XPV_HVM_DRIVER 1517 (void) HYPERVISOR_yield(); 1518 #endif /* XPV_HVM_DRIVER */ 1519 /* 1520 * file-backed devices can be slow 1521 */ 1522 drv_usecwait(XDF_QSEC << pollc); 1523 } 1524 cmn_err(CE_WARN, "xdf_polled_io: timeout"); 1525 rval = EIO; 1526 out: 1527 mutex_exit(&vdp->xdf_dev_lk); 1528 if (xdfdebug & SUSRES_DBG) 1529 xen_printf("xdf_drain_io: end, err=%d\n", rval); 1530 return (rval); 1531 } 1532 1533 /* ARGSUSED5 */ 1534 int 1535 xdf_lb_rdwr(dev_info_t *devi, uchar_t cmd, void *bufp, 1536 diskaddr_t start, size_t reqlen, void *tg_cookie) 1537 { 1538 xdf_t *vdp; 1539 struct buf *bp; 1540 int err = 0; 1541 1542 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1543 if (vdp == NULL) 1544 return (ENXIO); 1545 1546 if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) 1547 return (EINVAL); 1548 1549 bp = getrbuf(KM_SLEEP); 1550 if (cmd == TG_READ) 1551 bp->b_flags = B_BUSY | B_READ; 1552 else 1553 bp->b_flags = B_BUSY | B_WRITE; 1554 bp->b_un.b_addr = bufp; 1555 bp->b_bcount = reqlen; 1556 bp->b_blkno = start; 1557 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 1558 1559 mutex_enter(&vdp->xdf_dev_lk); 1560 if (vdp->xdf_xdev_iostat != NULL) 1561 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 1562 if (vdp->xdf_f_act == NULL) { 1563 vdp->xdf_f_act = vdp->xdf_l_act = bp; 1564 } else { 1565 vdp->xdf_l_act->av_forw = bp; 1566 vdp->xdf_l_act = bp; 1567 } 1568 mutex_exit(&vdp->xdf_dev_lk); 1569 xdf_iostart(vdp); 1570 err = biowait(bp); 1571 1572 ASSERT(bp->b_flags & B_DONE); 1573 1574 freerbuf(bp); 1575 return (err); 1576 } 1577 1578 /* 1579 * synthetic geometry 1580 */ 1581 #define XDF_NSECTS 256 1582 #define XDF_NHEADS 16 1583 1584 static void 1585 xdf_synthetic_pgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1586 { 1587 xdf_t *vdp; 1588 uint_t ncyl; 1589 1590 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1591 1592 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); 1593 1594 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1595 geomp->g_acyl = 0; 1596 geomp->g_nhead = XDF_NHEADS; 1597 geomp->g_secsize = XB_BSIZE; 1598 geomp->g_nsect = XDF_NSECTS; 1599 geomp->g_intrlv = 0; 1600 geomp->g_rpm = 7200; 1601 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1602 } 1603 1604 static int 1605 xdf_lb_getcap(dev_info_t *devi, diskaddr_t *capp) 1606 { 1607 xdf_t *vdp; 1608 1609 vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); 1610 1611 if (vdp == NULL) 1612 return (ENXIO); 1613 1614 mutex_enter(&vdp->xdf_dev_lk); 1615 *capp = vdp->xdf_pgeom.g_capacity; 1616 DPRINTF(LBL_DBG, ("capacity %llu\n", *capp)); 1617 mutex_exit(&vdp->xdf_dev_lk); 1618 return (0); 1619 } 1620 1621 static int 1622 xdf_lb_getpgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1623 { 1624 xdf_t *vdp; 1625 1626 if ((vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi))) == NULL) 1627 return (ENXIO); 1628 *geomp = vdp->xdf_pgeom; 1629 return (0); 1630 } 1631 1632 /* 1633 * No real HBA, no geometry available from it 1634 */ 1635 /*ARGSUSED*/ 1636 static int 1637 xdf_lb_getvgeom(dev_info_t *devi, cmlb_geom_t *geomp) 1638 { 1639 return (EINVAL); 1640 } 1641 1642 static int 1643 xdf_lb_getattribute(dev_info_t *devi, tg_attribute_t *tgattributep) 1644 { 1645 xdf_t *vdp; 1646 1647 if (!(vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)))) 1648 return (ENXIO); 1649 1650 if (XD_IS_RO(vdp)) 1651 tgattributep->media_is_writable = 0; 1652 else 1653 tgattributep->media_is_writable = 1; 1654 return (0); 1655 } 1656 1657 /* ARGSUSED3 */ 1658 int 1659 xdf_lb_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie) 1660 { 1661 switch (cmd) { 1662 case TG_GETPHYGEOM: 1663 return (xdf_lb_getpgeom(devi, (cmlb_geom_t *)arg)); 1664 case TG_GETVIRTGEOM: 1665 return (xdf_lb_getvgeom(devi, (cmlb_geom_t *)arg)); 1666 case TG_GETCAPACITY: 1667 return (xdf_lb_getcap(devi, (diskaddr_t *)arg)); 1668 case TG_GETBLOCKSIZE: 1669 *(uint32_t *)arg = XB_BSIZE; 1670 return (0); 1671 case TG_GETATTR: 1672 return (xdf_lb_getattribute(devi, (tg_attribute_t *)arg)); 1673 default: 1674 return (ENOTTY); 1675 } 1676 } 1677 1678 /* 1679 * Kick-off connect process 1680 * Status should be XD_UNKNOWN or XD_CLOSED 1681 * On success, status will be changed to XD_INIT 1682 * On error, status won't be changed 1683 */ 1684 static int 1685 xdf_start_connect(xdf_t *vdp) 1686 { 1687 char *xsnode; 1688 grant_ref_t gref; 1689 xenbus_transaction_t xbt; 1690 int rv; 1691 dev_info_t *dip = vdp->xdf_dip; 1692 1693 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == (domid_t)-1) 1694 goto errout; 1695 1696 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) { 1697 cmn_err(CE_WARN, "xdf@%s: failed to alloc event channel", 1698 ddi_get_name_addr(dip)); 1699 goto errout; 1700 } 1701 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1702 #ifdef XPV_HVM_DRIVER 1703 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1704 #else /* !XPV_HVM_DRIVER */ 1705 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1706 DDI_SUCCESS) { 1707 cmn_err(CE_WARN, "xdf_start_connect: xdf@%s: " 1708 "failed to add intr handler", ddi_get_name_addr(dip)); 1709 goto errout1; 1710 } 1711 #endif /* !XPV_HVM_DRIVER */ 1712 1713 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1714 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1715 DDI_SUCCESS) { 1716 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1717 ddi_get_name_addr(dip)); 1718 goto errout2; 1719 } 1720 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1721 1722 /* 1723 * Write into xenstore the info needed by backend 1724 */ 1725 if ((xsnode = xvdi_get_xsname(dip)) == NULL) { 1726 cmn_err(CE_WARN, "xdf@%s: " 1727 "failed to get xenstore node path", 1728 ddi_get_name_addr(dip)); 1729 goto fail_trans; 1730 } 1731 trans_retry: 1732 if (xenbus_transaction_start(&xbt)) { 1733 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1734 ddi_get_name_addr(dip)); 1735 xvdi_fatal_error(dip, EIO, "transaction start"); 1736 goto fail_trans; 1737 } 1738 1739 if (rv = xenbus_printf(xbt, xsnode, "ring-ref", "%u", gref)) { 1740 cmn_err(CE_WARN, "xdf@%s: failed to write ring-ref", 1741 ddi_get_name_addr(dip)); 1742 xvdi_fatal_error(dip, rv, "writing ring-ref"); 1743 goto abort_trans; 1744 } 1745 1746 if (rv = xenbus_printf(xbt, xsnode, "event-channel", "%u", 1747 vdp->xdf_evtchn)) { 1748 cmn_err(CE_WARN, "xdf@%s: failed to write event-channel", 1749 ddi_get_name_addr(dip)); 1750 xvdi_fatal_error(dip, rv, "writing event-channel"); 1751 goto abort_trans; 1752 } 1753 1754 /* 1755 * "protocol" is written by the domain builder in the case of PV 1756 * domains. However, it is not written for HVM domains, so let's 1757 * write it here. 1758 */ 1759 if (rv = xenbus_printf(xbt, xsnode, "protocol", "%s", 1760 XEN_IO_PROTO_ABI_NATIVE)) { 1761 cmn_err(CE_WARN, "xdf@%s: failed to write protocol", 1762 ddi_get_name_addr(dip)); 1763 xvdi_fatal_error(dip, rv, "writing protocol"); 1764 goto abort_trans; 1765 } 1766 1767 if ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0) { 1768 cmn_err(CE_WARN, "xdf@%s: " 1769 "failed to switch state to XenbusStateInitialised", 1770 ddi_get_name_addr(dip)); 1771 xvdi_fatal_error(dip, rv, "writing state"); 1772 goto abort_trans; 1773 } 1774 1775 /* kick-off connect process */ 1776 if (rv = xenbus_transaction_end(xbt, 0)) { 1777 if (rv == EAGAIN) 1778 goto trans_retry; 1779 cmn_err(CE_WARN, "xdf@%s: failed to end transaction", 1780 ddi_get_name_addr(dip)); 1781 xvdi_fatal_error(dip, rv, "completing transaction"); 1782 goto fail_trans; 1783 } 1784 1785 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1786 mutex_enter(&vdp->xdf_dev_lk); 1787 vdp->xdf_status = XD_INIT; 1788 mutex_exit(&vdp->xdf_dev_lk); 1789 1790 return (DDI_SUCCESS); 1791 1792 abort_trans: 1793 (void) xenbus_transaction_end(xbt, 1); 1794 fail_trans: 1795 xvdi_free_ring(vdp->xdf_xb_ring); 1796 errout2: 1797 #ifdef XPV_HVM_DRIVER 1798 ec_unbind_evtchn(vdp->xdf_evtchn); 1799 #else /* !XPV_HVM_DRIVER */ 1800 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1801 #endif /* !XPV_HVM_DRIVER */ 1802 errout1: 1803 xvdi_free_evtchn(dip); 1804 errout: 1805 cmn_err(CE_WARN, "xdf@%s: fail to kick-off connecting", 1806 ddi_get_name_addr(dip)); 1807 return (DDI_FAILURE); 1808 } 1809 1810 /* 1811 * Kick-off disconnect process 1812 * Status won't be changed 1813 */ 1814 static int 1815 xdf_start_disconnect(xdf_t *vdp) 1816 { 1817 if (xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed) > 0) { 1818 cmn_err(CE_WARN, "xdf@%s: fail to kick-off disconnecting", 1819 ddi_get_name_addr(vdp->xdf_dip)); 1820 return (DDI_FAILURE); 1821 } 1822 1823 return (DDI_SUCCESS); 1824 } 1825 1826 int 1827 xdf_get_flush_block(xdf_t *vdp) 1828 { 1829 /* 1830 * Get a DEV_BSIZE aligned bufer 1831 */ 1832 vdp->xdf_flush_mem = kmem_alloc(DEV_BSIZE * 2, KM_SLEEP); 1833 vdp->xdf_cache_flush_block = 1834 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), DEV_BSIZE); 1835 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1836 xdf_flush_block, DEV_BSIZE, NULL) != 0) 1837 return (DDI_FAILURE); 1838 return (DDI_SUCCESS); 1839 } 1840 1841 /* 1842 * Finish other initialization after we've connected to backend 1843 * Status should be XD_INIT before calling this routine 1844 * On success, status should be changed to XD_READY 1845 * On error, status should stay XD_INIT 1846 */ 1847 static int 1848 xdf_post_connect(xdf_t *vdp) 1849 { 1850 int rv; 1851 uint_t len; 1852 char *type; 1853 char *barrier; 1854 dev_info_t *devi = vdp->xdf_dip; 1855 1856 /* 1857 * Determine if feature barrier is supported by backend 1858 */ 1859 if (xenbus_read(XBT_NULL, xvdi_get_oename(devi), 1860 "feature-barrier", (void **)&barrier, &len) == 0) { 1861 vdp->xdf_feature_barrier = 1; 1862 kmem_free(barrier, len); 1863 } else { 1864 cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier", 1865 ddi_get_name_addr(vdp->xdf_dip)); 1866 vdp->xdf_feature_barrier = 0; 1867 } 1868 1869 /* probe backend */ 1870 if (rv = xenbus_gather(XBT_NULL, xvdi_get_oename(devi), 1871 "sectors", "%"SCNu64, &vdp->xdf_xdev_nblocks, 1872 "info", "%u", &vdp->xdf_xdev_info, NULL)) { 1873 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1874 "cannot read backend info", ddi_get_name_addr(devi)); 1875 xvdi_fatal_error(devi, rv, "reading backend info"); 1876 return (DDI_FAILURE); 1877 } 1878 1879 /* 1880 * Make sure that the device we're connecting isn't smaller than 1881 * the old connected device. 1882 */ 1883 if (vdp->xdf_xdev_nblocks < vdp->xdf_pgeom.g_capacity) { 1884 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1885 "backend disk device shrank", ddi_get_name_addr(devi)); 1886 /* XXX: call xvdi_fatal_error() here? */ 1887 xvdi_fatal_error(devi, rv, "reading backend info"); 1888 return (DDI_FAILURE); 1889 } 1890 1891 #ifdef _ILP32 1892 if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) { 1893 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1894 "backend disk device too large with %llu blocks for" 1895 " 32-bit kernel", ddi_get_name_addr(devi), 1896 vdp->xdf_xdev_nblocks); 1897 xvdi_fatal_error(devi, rv, "reading backend info"); 1898 return (DDI_FAILURE); 1899 } 1900 #endif 1901 1902 1903 /* 1904 * Only update the physical geometry to reflect the new device 1905 * size if this is the first time we're connecting to the backend 1906 * device. Once we assign a physical geometry to a device it stays 1907 * fixed until: 1908 * - we get detach and re-attached (at which point we 1909 * automatically assign a new physical geometry). 1910 * - someone calls TG_SETPHYGEOM to explicity set the 1911 * physical geometry. 1912 */ 1913 if (vdp->xdf_pgeom.g_capacity == 0) 1914 xdf_synthetic_pgeom(devi, &vdp->xdf_pgeom); 1915 1916 /* fix disk type */ 1917 if (xenbus_read(XBT_NULL, xvdi_get_xsname(devi), "device-type", 1918 (void **)&type, &len) != 0) { 1919 cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " 1920 "cannot read device-type", ddi_get_name_addr(devi)); 1921 xvdi_fatal_error(devi, rv, "reading device-type"); 1922 return (DDI_FAILURE); 1923 } 1924 if (strcmp(type, "cdrom") == 0) 1925 vdp->xdf_xdev_info |= VDISK_CDROM; 1926 kmem_free(type, len); 1927 1928 /* 1929 * We've created all the minor nodes via cmlb_attach() using default 1930 * value in xdf_attach() to make it possible to block in xdf_open(), 1931 * in case there's anyone (say, booting thread) ever trying to open 1932 * it before connected to backend. We will refresh all those minor 1933 * nodes w/ latest info we've got now when we are almost connected. 1934 * 1935 * Don't do this when xdf is already opened by someone (could happen 1936 * during resume), for that cmlb_attach() will invalid the label info 1937 * and confuse those who has already opened the node, which is bad. 1938 */ 1939 if (!xdf_isopen(vdp, -1) && (XD_IS_CD(vdp) || XD_IS_RM(vdp))) { 1940 /* re-init cmlb w/ latest info we got from backend */ 1941 if (cmlb_attach(devi, &xdf_lb_ops, 1942 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 1943 XD_IS_RM(vdp), 1, 1944 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD, 1945 #if defined(XPV_HVM_DRIVER) 1946 CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT | 1947 CMLB_INTERNAL_MINOR_NODES, 1948 #else /* !XPV_HVM_DRIVER */ 1949 CMLB_FAKE_LABEL_ONE_PARTITION, 1950 #endif /* !XPV_HVM_DRIVER */ 1951 vdp->xdf_vd_lbl, NULL) != 0) { 1952 cmn_err(CE_WARN, "xdf@%s: cmlb attach failed", 1953 ddi_get_name_addr(devi)); 1954 return (DDI_FAILURE); 1955 } 1956 } 1957 1958 /* mark vbd is ready for I/O */ 1959 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 1960 mutex_enter(&vdp->xdf_dev_lk); 1961 vdp->xdf_status = XD_READY; 1962 mutex_exit(&vdp->xdf_dev_lk); 1963 /* 1964 * If backend has feature-barrier, see if it supports disk 1965 * cache flush op. 1966 */ 1967 vdp->xdf_flush_supported = 0; 1968 if (vdp->xdf_feature_barrier) { 1969 /* 1970 * Pretend we already know flush is supported so probe 1971 * will attempt the correct op. 1972 */ 1973 vdp->xdf_flush_supported = 1; 1974 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1975 vdp->xdf_flush_supported = 1; 1976 } else { 1977 vdp->xdf_flush_supported = 0; 1978 /* 1979 * If the other end does not support the cache flush op 1980 * then we must use a barrier-write to force disk 1981 * cache flushing. Barrier writes require that a data 1982 * block actually be written. 1983 * Cache a block to barrier-write when we are 1984 * asked to perform a flush. 1985 * XXX - would it be better to just copy 1 block 1986 * (512 bytes) from whatever write we did last 1987 * and rewrite that block? 1988 */ 1989 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) 1990 return (DDI_FAILURE); 1991 } 1992 } 1993 1994 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", ddi_get_name_addr(devi), 1995 (uint64_t)vdp->xdf_xdev_nblocks); 1996 1997 return (DDI_SUCCESS); 1998 } 1999 2000 /* 2001 * Finish other uninitialization after we've disconnected from backend 2002 * when status is XD_CLOSING or XD_INIT. After returns, status is XD_CLOSED 2003 */ 2004 static void 2005 xdf_post_disconnect(xdf_t *vdp) 2006 { 2007 #ifdef XPV_HVM_DRIVER 2008 ec_unbind_evtchn(vdp->xdf_evtchn); 2009 #else /* !XPV_HVM_DRIVER */ 2010 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 2011 #endif /* !XPV_HVM_DRIVER */ 2012 xvdi_free_evtchn(vdp->xdf_dip); 2013 xvdi_free_ring(vdp->xdf_xb_ring); 2014 vdp->xdf_xb_ring = NULL; 2015 vdp->xdf_xb_ring_hdl = NULL; 2016 vdp->xdf_peer = (domid_t)-1; 2017 2018 ASSERT(mutex_owned(&vdp->xdf_cb_lk)); 2019 mutex_enter(&vdp->xdf_dev_lk); 2020 vdp->xdf_status = XD_CLOSED; 2021 mutex_exit(&vdp->xdf_dev_lk); 2022 } 2023 2024 /*ARGSUSED*/ 2025 static void 2026 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 2027 { 2028 XenbusState new_state = *(XenbusState *)impl_data; 2029 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2030 boolean_t unexpect_die = B_FALSE; 2031 int status; 2032 2033 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 2034 ddi_get_name_addr(dip), new_state)); 2035 2036 mutex_enter(&vdp->xdf_cb_lk); 2037 2038 if (xdf_check_state_transition(vdp, new_state) == DDI_FAILURE) { 2039 mutex_exit(&vdp->xdf_cb_lk); 2040 return; 2041 } 2042 2043 switch (new_state) { 2044 case XenbusStateInitialising: 2045 ASSERT(vdp->xdf_status == XD_CLOSED); 2046 /* 2047 * backend recovered from a previous failure, 2048 * kick-off connect process again 2049 */ 2050 if (xdf_start_connect(vdp) != DDI_SUCCESS) { 2051 cmn_err(CE_WARN, "xdf@%s:" 2052 " failed to start reconnecting to backend", 2053 ddi_get_name_addr(dip)); 2054 } 2055 break; 2056 case XenbusStateConnected: 2057 ASSERT(vdp->xdf_status == XD_INIT); 2058 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 2059 /* finish final init after connect */ 2060 if (xdf_post_connect(vdp) != DDI_SUCCESS) 2061 (void) xdf_start_disconnect(vdp); 2062 break; 2063 case XenbusStateClosing: 2064 mutex_enter(&vdp->xdf_dev_lk); 2065 if (xdf_isopen(vdp, -1)) { 2066 cmn_err(CE_NOTE, "xdf@%s: hot-unplug failed, " 2067 "still in use", ddi_get_name_addr(dip)); 2068 } else { 2069 if ((vdp->xdf_status == XD_READY) || 2070 (vdp->xdf_status == XD_INIT)) 2071 vdp->xdf_status = XD_CLOSING; 2072 (void) xdf_start_disconnect(vdp); 2073 } 2074 mutex_exit(&vdp->xdf_dev_lk); 2075 break; 2076 case XenbusStateClosed: 2077 /* first check if BE closed unexpectedly */ 2078 mutex_enter(&vdp->xdf_dev_lk); 2079 if (xdf_isopen(vdp, -1)) { 2080 unexpect_die = B_TRUE; 2081 unexpectedie(vdp); 2082 cmn_err(CE_WARN, "xdf@%s: backend closed, " 2083 "reconnecting...", ddi_get_name_addr(dip)); 2084 } 2085 mutex_exit(&vdp->xdf_dev_lk); 2086 2087 if (vdp->xdf_status == XD_READY) { 2088 mutex_enter(&vdp->xdf_dev_lk); 2089 vdp->xdf_status = XD_CLOSING; 2090 mutex_exit(&vdp->xdf_dev_lk); 2091 2092 #ifdef DOMU_BACKEND 2093 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 2094 #endif 2095 2096 xdf_post_disconnect(vdp); 2097 (void) xvdi_switch_state(dip, XBT_NULL, 2098 XenbusStateClosed); 2099 } else if ((vdp->xdf_status == XD_INIT) || 2100 (vdp->xdf_status == XD_CLOSING)) { 2101 xdf_post_disconnect(vdp); 2102 } else { 2103 mutex_enter(&vdp->xdf_dev_lk); 2104 vdp->xdf_status = XD_CLOSED; 2105 mutex_exit(&vdp->xdf_dev_lk); 2106 } 2107 } 2108 2109 /* notify anybody waiting for oe state change */ 2110 mutex_enter(&vdp->xdf_dev_lk); 2111 cv_broadcast(&vdp->xdf_dev_cv); 2112 mutex_exit(&vdp->xdf_dev_lk); 2113 2114 status = vdp->xdf_status; 2115 mutex_exit(&vdp->xdf_cb_lk); 2116 2117 if (status == XD_READY) { 2118 xdf_iostart(vdp); 2119 } else if ((status == XD_CLOSED) && !unexpect_die) { 2120 /* interface is closed successfully, remove all minor nodes */ 2121 if (vdp->xdf_vd_lbl != NULL) { 2122 cmlb_detach(vdp->xdf_vd_lbl, NULL); 2123 cmlb_free_handle(&vdp->xdf_vd_lbl); 2124 vdp->xdf_vd_lbl = NULL; 2125 } 2126 } 2127 } 2128 2129 /* check if partition is open, -1 - check all partitions on the disk */ 2130 static boolean_t 2131 xdf_isopen(xdf_t *vdp, int partition) 2132 { 2133 int i; 2134 ulong_t parbit; 2135 boolean_t rval = B_FALSE; 2136 2137 ASSERT((partition == -1) || 2138 ((partition >= 0) || (partition < XDF_PEXT))); 2139 2140 if (partition == -1) 2141 parbit = (ulong_t)-1; 2142 else 2143 parbit = 1 << partition; 2144 2145 for (i = 0; i < OTYPCNT; i++) { 2146 if (vdp->xdf_vd_open[i] & parbit) 2147 rval = B_TRUE; 2148 } 2149 2150 return (rval); 2151 } 2152 2153 /* 2154 * Xdf_check_state_transition will check the XenbusState change to see 2155 * if the change is a valid transition or not. 2156 * The new state is written by backend domain, or by running xenstore-write 2157 * to change it manually in dom0 2158 */ 2159 static int 2160 xdf_check_state_transition(xdf_t *vdp, XenbusState oestate) 2161 { 2162 int status; 2163 int stcheck; 2164 #define STOK 0 /* need further process */ 2165 #define STNOP 1 /* no action need taking */ 2166 #define STBUG 2 /* unexpected state change, could be a bug */ 2167 2168 status = vdp->xdf_status; 2169 stcheck = STOK; 2170 2171 switch (status) { 2172 case XD_UNKNOWN: 2173 if ((oestate == XenbusStateUnknown) || 2174 (oestate == XenbusStateConnected)) 2175 stcheck = STBUG; 2176 else if ((oestate == XenbusStateInitialising) || 2177 (oestate == XenbusStateInitWait) || 2178 (oestate == XenbusStateInitialised)) 2179 stcheck = STNOP; 2180 break; 2181 case XD_INIT: 2182 if (oestate == XenbusStateUnknown) 2183 stcheck = STBUG; 2184 else if ((oestate == XenbusStateInitialising) || 2185 (oestate == XenbusStateInitWait) || 2186 (oestate == XenbusStateInitialised)) 2187 stcheck = STNOP; 2188 break; 2189 case XD_READY: 2190 if ((oestate == XenbusStateUnknown) || 2191 (oestate == XenbusStateInitialising) || 2192 (oestate == XenbusStateInitWait) || 2193 (oestate == XenbusStateInitialised)) 2194 stcheck = STBUG; 2195 else if (oestate == XenbusStateConnected) 2196 stcheck = STNOP; 2197 break; 2198 case XD_CLOSING: 2199 if ((oestate == XenbusStateUnknown) || 2200 (oestate == XenbusStateInitialising) || 2201 (oestate == XenbusStateInitWait) || 2202 (oestate == XenbusStateInitialised) || 2203 (oestate == XenbusStateConnected)) 2204 stcheck = STBUG; 2205 else if (oestate == XenbusStateClosing) 2206 stcheck = STNOP; 2207 break; 2208 case XD_CLOSED: 2209 if ((oestate == XenbusStateUnknown) || 2210 (oestate == XenbusStateConnected)) 2211 stcheck = STBUG; 2212 else if ((oestate == XenbusStateInitWait) || 2213 (oestate == XenbusStateInitialised) || 2214 (oestate == XenbusStateClosing) || 2215 (oestate == XenbusStateClosed)) 2216 stcheck = STNOP; 2217 break; 2218 case XD_SUSPEND: 2219 default: 2220 stcheck = STBUG; 2221 } 2222 2223 if (stcheck == STOK) 2224 return (DDI_SUCCESS); 2225 2226 if (stcheck == STBUG) 2227 cmn_err(CE_NOTE, "xdf@%s: unexpected otherend " 2228 "state change to %d!, when status is %d", 2229 ddi_get_name_addr(vdp->xdf_dip), oestate, status); 2230 2231 return (DDI_FAILURE); 2232 } 2233 2234 static int 2235 xdf_connect(xdf_t *vdp, boolean_t wait) 2236 { 2237 ASSERT(mutex_owned(&vdp->xdf_dev_lk)); 2238 while (vdp->xdf_status != XD_READY) { 2239 if (!wait || (vdp->xdf_status > XD_READY)) 2240 break; 2241 2242 if (cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk) == 0) 2243 break; 2244 } 2245 2246 return (vdp->xdf_status); 2247 } 2248 2249 /* 2250 * callback func when DMA/GTE resources is available 2251 * 2252 * Note: we only register one callback function to grant table subsystem 2253 * since we only have one 'struct gnttab_free_callback' in xdf_t. 2254 */ 2255 static int 2256 xdf_dmacallback(caddr_t arg) 2257 { 2258 xdf_t *vdp = (xdf_t *)arg; 2259 ASSERT(vdp != NULL); 2260 2261 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 2262 ddi_get_name_addr(vdp->xdf_dip))); 2263 2264 ddi_trigger_softintr(vdp->xdf_softintr_id); 2265 return (DDI_DMA_CALLBACK_DONE); 2266 } 2267 2268 static uint_t 2269 xdf_iorestart(caddr_t arg) 2270 { 2271 xdf_t *vdp = (xdf_t *)arg; 2272 2273 ASSERT(vdp != NULL); 2274 2275 mutex_enter(&vdp->xdf_dev_lk); 2276 ASSERT(ISDMACBON(vdp)); 2277 SETDMACBOFF(vdp); 2278 mutex_exit(&vdp->xdf_dev_lk); 2279 2280 xdf_iostart(vdp); 2281 2282 return (DDI_INTR_CLAIMED); 2283 } 2284 2285 static void 2286 xdf_timeout_handler(void *arg) 2287 { 2288 xdf_t *vdp = arg; 2289 2290 mutex_enter(&vdp->xdf_dev_lk); 2291 vdp->xdf_timeout_id = 0; 2292 mutex_exit(&vdp->xdf_dev_lk); 2293 2294 /* new timeout thread could be re-scheduled */ 2295 xdf_iostart(vdp); 2296 } 2297 2298 /* 2299 * Alloc a vreq for this bp 2300 * bp->av_back contains the pointer to the vreq upon return 2301 */ 2302 static v_req_t * 2303 vreq_get(xdf_t *vdp, buf_t *bp) 2304 { 2305 v_req_t *vreq = NULL; 2306 2307 ASSERT(BP2VREQ(bp) == NULL); 2308 2309 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 2310 if (vreq == NULL) { 2311 if (vdp->xdf_timeout_id == 0) 2312 /* restart I/O after one second */ 2313 vdp->xdf_timeout_id = 2314 timeout(xdf_timeout_handler, vdp, hz); 2315 return (NULL); 2316 } 2317 bzero(vreq, sizeof (v_req_t)); 2318 2319 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 2320 bp->av_back = (buf_t *)vreq; 2321 vreq->v_buf = bp; 2322 vreq->v_status = VREQ_INIT; 2323 /* init of other fields in vreq is up to the caller */ 2324 2325 return (vreq); 2326 } 2327 2328 static void 2329 vreq_free(xdf_t *vdp, v_req_t *vreq) 2330 { 2331 buf_t *bp = vreq->v_buf; 2332 2333 list_remove(&vdp->xdf_vreq_act, (void *)vreq); 2334 2335 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 2336 goto done; 2337 2338 switch (vreq->v_status) { 2339 case VREQ_DMAWIN_DONE: 2340 case VREQ_GS_ALLOCED: 2341 case VREQ_DMABUF_BOUND: 2342 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 2343 /*FALLTHRU*/ 2344 case VREQ_DMAMEM_ALLOCED: 2345 if (!ALIGNED_XFER(bp)) { 2346 ASSERT(vreq->v_abuf != NULL); 2347 if (!IS_ERROR(bp) && IS_READ(bp)) 2348 bcopy(vreq->v_abuf, bp->b_un.b_addr, 2349 bp->b_bcount); 2350 ddi_dma_mem_free(&vreq->v_align); 2351 } 2352 /*FALLTHRU*/ 2353 case VREQ_MEMDMAHDL_ALLOCED: 2354 if (!ALIGNED_XFER(bp)) 2355 ddi_dma_free_handle(&vreq->v_memdmahdl); 2356 /*FALLTHRU*/ 2357 case VREQ_DMAHDL_ALLOCED: 2358 ddi_dma_free_handle(&vreq->v_dmahdl); 2359 break; 2360 default: 2361 break; 2362 } 2363 done: 2364 vreq->v_buf->av_back = NULL; 2365 kmem_cache_free(xdf_vreq_cache, vreq); 2366 } 2367 2368 /* 2369 * Initalize the DMA and grant table resources for the buf 2370 */ 2371 static int 2372 vreq_setup(xdf_t *vdp, v_req_t *vreq) 2373 { 2374 int rc; 2375 ddi_dma_attr_t dmaattr; 2376 uint_t ndcs, ndws; 2377 ddi_dma_handle_t dh; 2378 ddi_dma_handle_t mdh; 2379 ddi_dma_cookie_t dc; 2380 ddi_acc_handle_t abh; 2381 caddr_t aba; 2382 ge_slot_t *gs; 2383 size_t bufsz; 2384 off_t off; 2385 size_t sz; 2386 buf_t *bp = vreq->v_buf; 2387 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 2388 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 2389 2390 switch (vreq->v_status) { 2391 case VREQ_INIT: 2392 if (IS_FLUSH_DISKCACHE(bp)) { 2393 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2394 DPRINTF(DMA_DBG, ( 2395 "xdf@%s: get ge_slotfailed\n", 2396 ddi_get_name_addr(vdp->xdf_dip))); 2397 return (DDI_FAILURE); 2398 } 2399 vreq->v_blkno = 0; 2400 vreq->v_nslots = 1; 2401 vreq->v_gs = gs; 2402 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 2403 vreq->v_status = VREQ_GS_ALLOCED; 2404 gs->vreq = vreq; 2405 return (DDI_SUCCESS); 2406 } 2407 2408 if (IS_WRITE_BARRIER(vdp, bp)) 2409 vreq->v_flush_diskcache = WRITE_BARRIER; 2410 vreq->v_blkno = bp->b_blkno + 2411 (diskaddr_t)(uintptr_t)bp->b_private; 2412 bp->b_private = NULL; 2413 /* See if we wrote new data to our flush block */ 2414 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 2415 check_fbwrite(vdp, bp, vreq->v_blkno); 2416 vreq->v_status = VREQ_INIT_DONE; 2417 /*FALLTHRU*/ 2418 2419 case VREQ_INIT_DONE: 2420 /* 2421 * alloc DMA handle 2422 */ 2423 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 2424 xdf_dmacallback, (caddr_t)vdp, &dh); 2425 if (rc != DDI_SUCCESS) { 2426 SETDMACBON(vdp); 2427 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 2428 ddi_get_name_addr(vdp->xdf_dip))); 2429 return (DDI_FAILURE); 2430 } 2431 2432 vreq->v_dmahdl = dh; 2433 vreq->v_status = VREQ_DMAHDL_ALLOCED; 2434 /*FALLTHRU*/ 2435 2436 case VREQ_DMAHDL_ALLOCED: 2437 /* 2438 * alloc dma handle for 512-byte aligned buf 2439 */ 2440 if (!ALIGNED_XFER(bp)) { 2441 /* 2442 * XXPV: we need to temporarily enlarge the seg 2443 * boundary and s/g length to work round CR6381968 2444 */ 2445 dmaattr = xb_dma_attr; 2446 dmaattr.dma_attr_seg = (uint64_t)-1; 2447 dmaattr.dma_attr_sgllen = INT_MAX; 2448 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 2449 xdf_dmacallback, (caddr_t)vdp, &mdh); 2450 if (rc != DDI_SUCCESS) { 2451 SETDMACBON(vdp); 2452 DPRINTF(DMA_DBG, ("xdf@%s: unaligned buf DMA" 2453 "handle alloc failed\n", 2454 ddi_get_name_addr(vdp->xdf_dip))); 2455 return (DDI_FAILURE); 2456 } 2457 vreq->v_memdmahdl = mdh; 2458 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 2459 } 2460 /*FALLTHRU*/ 2461 2462 case VREQ_MEMDMAHDL_ALLOCED: 2463 /* 2464 * alloc 512-byte aligned buf 2465 */ 2466 if (!ALIGNED_XFER(bp)) { 2467 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 2468 bp_mapin(bp); 2469 2470 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 2471 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 2472 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 2473 &aba, &bufsz, &abh); 2474 if (rc != DDI_SUCCESS) { 2475 SETDMACBON(vdp); 2476 DPRINTF(DMA_DBG, ( 2477 "xdf@%s: DMA mem allocation failed\n", 2478 ddi_get_name_addr(vdp->xdf_dip))); 2479 return (DDI_FAILURE); 2480 } 2481 2482 vreq->v_abuf = aba; 2483 vreq->v_align = abh; 2484 vreq->v_status = VREQ_DMAMEM_ALLOCED; 2485 2486 ASSERT(bufsz >= bp->b_bcount); 2487 if (!IS_READ(bp)) 2488 bcopy(bp->b_un.b_addr, vreq->v_abuf, 2489 bp->b_bcount); 2490 } 2491 /*FALLTHRU*/ 2492 2493 case VREQ_DMAMEM_ALLOCED: 2494 /* 2495 * dma bind 2496 */ 2497 if (ALIGNED_XFER(bp)) { 2498 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 2499 dma_flags, xdf_dmacallback, (caddr_t)vdp, 2500 &dc, &ndcs); 2501 } else { 2502 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 2503 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 2504 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 2505 } 2506 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 2507 /* get num of dma windows */ 2508 if (rc == DDI_DMA_PARTIAL_MAP) { 2509 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 2510 ASSERT(rc == DDI_SUCCESS); 2511 } else { 2512 ndws = 1; 2513 } 2514 } else { 2515 SETDMACBON(vdp); 2516 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 2517 ddi_get_name_addr(vdp->xdf_dip))); 2518 return (DDI_FAILURE); 2519 } 2520 2521 vreq->v_dmac = dc; 2522 vreq->v_dmaw = 0; 2523 vreq->v_ndmacs = ndcs; 2524 vreq->v_ndmaws = ndws; 2525 vreq->v_nslots = ndws; 2526 vreq->v_status = VREQ_DMABUF_BOUND; 2527 /*FALLTHRU*/ 2528 2529 case VREQ_DMABUF_BOUND: 2530 /* 2531 * get ge_slot, callback is set upon failure from gs_get(), 2532 * if not set previously 2533 */ 2534 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2535 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2536 ddi_get_name_addr(vdp->xdf_dip))); 2537 return (DDI_FAILURE); 2538 } 2539 2540 vreq->v_gs = gs; 2541 gs->vreq = vreq; 2542 vreq->v_status = VREQ_GS_ALLOCED; 2543 break; 2544 2545 case VREQ_GS_ALLOCED: 2546 /* nothing need to be done */ 2547 break; 2548 2549 case VREQ_DMAWIN_DONE: 2550 /* 2551 * move to the next dma window 2552 */ 2553 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 2554 2555 /* get a ge_slot for this DMA window */ 2556 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 2557 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 2558 ddi_get_name_addr(vdp->xdf_dip))); 2559 return (DDI_FAILURE); 2560 } 2561 2562 vreq->v_gs = gs; 2563 gs->vreq = vreq; 2564 vreq->v_dmaw++; 2565 rc = ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 2566 &vreq->v_dmac, &vreq->v_ndmacs); 2567 ASSERT(rc == DDI_SUCCESS); 2568 vreq->v_status = VREQ_GS_ALLOCED; 2569 break; 2570 2571 default: 2572 return (DDI_FAILURE); 2573 } 2574 2575 return (DDI_SUCCESS); 2576 } 2577 2578 static ge_slot_t * 2579 gs_get(xdf_t *vdp, int isread) 2580 { 2581 grant_ref_t gh; 2582 ge_slot_t *gs; 2583 2584 /* try to alloc GTEs needed in this slot, first */ 2585 if (gnttab_alloc_grant_references( 2586 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 2587 if (vdp->xdf_gnt_callback.next == NULL) { 2588 SETDMACBON(vdp); 2589 gnttab_request_free_callback( 2590 &vdp->xdf_gnt_callback, 2591 (void (*)(void *))xdf_dmacallback, 2592 (void *)vdp, 2593 BLKIF_MAX_SEGMENTS_PER_REQUEST); 2594 } 2595 return (NULL); 2596 } 2597 2598 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 2599 if (gs == NULL) { 2600 gnttab_free_grant_references(gh); 2601 if (vdp->xdf_timeout_id == 0) 2602 /* restart I/O after one second */ 2603 vdp->xdf_timeout_id = 2604 timeout(xdf_timeout_handler, vdp, hz); 2605 return (NULL); 2606 } 2607 2608 /* init gs_slot */ 2609 list_insert_head(&vdp->xdf_gs_act, (void *)gs); 2610 gs->oeid = vdp->xdf_peer; 2611 gs->isread = isread; 2612 gs->ghead = gh; 2613 gs->ngrefs = 0; 2614 2615 return (gs); 2616 } 2617 2618 static void 2619 gs_free(xdf_t *vdp, ge_slot_t *gs) 2620 { 2621 int i; 2622 grant_ref_t *gp = gs->ge; 2623 int ngrefs = gs->ngrefs; 2624 boolean_t isread = gs->isread; 2625 2626 list_remove(&vdp->xdf_gs_act, (void *)gs); 2627 2628 /* release all grant table entry resources used in this slot */ 2629 for (i = 0; i < ngrefs; i++, gp++) 2630 gnttab_end_foreign_access(*gp, !isread, 0); 2631 gnttab_free_grant_references(gs->ghead); 2632 2633 kmem_cache_free(xdf_gs_cache, (void *)gs); 2634 } 2635 2636 static grant_ref_t 2637 gs_grant(ge_slot_t *gs, mfn_t mfn) 2638 { 2639 grant_ref_t gr = gnttab_claim_grant_reference(&gs->ghead); 2640 2641 ASSERT(gr != -1); 2642 ASSERT(gs->ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 2643 gs->ge[gs->ngrefs++] = gr; 2644 gnttab_grant_foreign_access_ref(gr, gs->oeid, mfn, !gs->isread); 2645 2646 return (gr); 2647 } 2648 2649 static void 2650 unexpectedie(xdf_t *vdp) 2651 { 2652 /* clean up I/Os in ring that have responses */ 2653 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 2654 mutex_exit(&vdp->xdf_dev_lk); 2655 (void) xdf_intr((caddr_t)vdp); 2656 mutex_enter(&vdp->xdf_dev_lk); 2657 } 2658 2659 /* free up all grant table entries */ 2660 while (!list_is_empty(&vdp->xdf_gs_act)) 2661 gs_free(vdp, list_head(&vdp->xdf_gs_act)); 2662 2663 /* 2664 * move bp back to active list orderly 2665 * vreq_busy is updated in vreq_free() 2666 */ 2667 while (!list_is_empty(&vdp->xdf_vreq_act)) { 2668 v_req_t *vreq = list_head(&vdp->xdf_vreq_act); 2669 buf_t *bp = vreq->v_buf; 2670 2671 bp->av_back = NULL; 2672 bp->b_resid = bp->b_bcount; 2673 if (vdp->xdf_f_act == NULL) { 2674 vdp->xdf_f_act = vdp->xdf_l_act = bp; 2675 } else { 2676 /* move to the head of list */ 2677 bp->av_forw = vdp->xdf_f_act; 2678 vdp->xdf_f_act = bp; 2679 } 2680 if (vdp->xdf_xdev_iostat != NULL) 2681 kstat_runq_back_to_waitq( 2682 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 2683 vreq_free(vdp, vreq); 2684 } 2685 } 2686 2687 static void 2688 xdfmin(struct buf *bp) 2689 { 2690 if (bp->b_bcount > xdf_maxphys) 2691 bp->b_bcount = xdf_maxphys; 2692 } 2693 2694 void 2695 xdf_kstat_delete(dev_info_t *dip) 2696 { 2697 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2698 kstat_t *kstat; 2699 2700 /* 2701 * The locking order here is xdf_iostat_lk and then xdf_dev_lk. 2702 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer 2703 * and the contents of the our kstat. xdf_iostat_lk is used 2704 * to protect the allocation and freeing of the actual kstat. 2705 * xdf_dev_lk can't be used for this purpose because kstat 2706 * readers use it to access the contents of the kstat and 2707 * hence it can't be held when calling kstat_delete(). 2708 */ 2709 mutex_enter(&vdp->xdf_iostat_lk); 2710 mutex_enter(&vdp->xdf_dev_lk); 2711 2712 if (vdp->xdf_xdev_iostat == NULL) { 2713 mutex_exit(&vdp->xdf_dev_lk); 2714 mutex_exit(&vdp->xdf_iostat_lk); 2715 return; 2716 } 2717 2718 kstat = vdp->xdf_xdev_iostat; 2719 vdp->xdf_xdev_iostat = NULL; 2720 mutex_exit(&vdp->xdf_dev_lk); 2721 2722 kstat_delete(kstat); 2723 mutex_exit(&vdp->xdf_iostat_lk); 2724 } 2725 2726 int 2727 xdf_kstat_create(dev_info_t *dip, char *ks_module, int ks_instance) 2728 { 2729 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2730 2731 /* See comment about locking in xdf_kstat_delete(). */ 2732 mutex_enter(&vdp->xdf_iostat_lk); 2733 mutex_enter(&vdp->xdf_dev_lk); 2734 2735 if (vdp->xdf_xdev_iostat != NULL) { 2736 mutex_exit(&vdp->xdf_dev_lk); 2737 mutex_exit(&vdp->xdf_iostat_lk); 2738 return (-1); 2739 } 2740 2741 if ((vdp->xdf_xdev_iostat = kstat_create( 2742 ks_module, ks_instance, NULL, "disk", 2743 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 2744 mutex_exit(&vdp->xdf_dev_lk); 2745 mutex_exit(&vdp->xdf_iostat_lk); 2746 return (-1); 2747 } 2748 2749 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 2750 kstat_install(vdp->xdf_xdev_iostat); 2751 mutex_exit(&vdp->xdf_dev_lk); 2752 mutex_exit(&vdp->xdf_iostat_lk); 2753 2754 return (0); 2755 } 2756 2757 #if defined(XPV_HVM_DRIVER) 2758 2759 typedef struct xdf_hvm_entry { 2760 list_node_t xdf_he_list; 2761 char *xdf_he_path; 2762 dev_info_t *xdf_he_dip; 2763 } xdf_hvm_entry_t; 2764 2765 static list_t xdf_hvm_list; 2766 static kmutex_t xdf_hvm_list_lock; 2767 2768 static xdf_hvm_entry_t * 2769 i_xdf_hvm_find(char *path, dev_info_t *dip) 2770 { 2771 xdf_hvm_entry_t *i; 2772 2773 ASSERT((path != NULL) || (dip != NULL)); 2774 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); 2775 2776 i = list_head(&xdf_hvm_list); 2777 while (i != NULL) { 2778 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { 2779 i = list_next(&xdf_hvm_list, i); 2780 continue; 2781 } 2782 if ((dip != NULL) && (i->xdf_he_dip != dip)) { 2783 i = list_next(&xdf_hvm_list, i); 2784 continue; 2785 } 2786 break; 2787 } 2788 return (i); 2789 } 2790 2791 dev_info_t * 2792 xdf_hvm_hold(char *path) 2793 { 2794 xdf_hvm_entry_t *i; 2795 dev_info_t *dip; 2796 2797 mutex_enter(&xdf_hvm_list_lock); 2798 i = i_xdf_hvm_find(path, NULL); 2799 if (i == NULL) { 2800 mutex_exit(&xdf_hvm_list_lock); 2801 return (B_FALSE); 2802 } 2803 ndi_hold_devi(dip = i->xdf_he_dip); 2804 mutex_exit(&xdf_hvm_list_lock); 2805 return (dip); 2806 } 2807 2808 static void 2809 xdf_hvm_add(dev_info_t *dip) 2810 { 2811 xdf_hvm_entry_t *i; 2812 char *path; 2813 2814 /* figure out the path for the dip */ 2815 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 2816 (void) ddi_pathname(dip, path); 2817 2818 i = kmem_alloc(sizeof (*i), KM_SLEEP); 2819 i->xdf_he_dip = dip; 2820 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); 2821 2822 mutex_enter(&xdf_hvm_list_lock); 2823 ASSERT(i_xdf_hvm_find(path, NULL) == NULL); 2824 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); 2825 list_insert_head(&xdf_hvm_list, i); 2826 mutex_exit(&xdf_hvm_list_lock); 2827 2828 kmem_free(path, MAXPATHLEN); 2829 } 2830 2831 static void 2832 xdf_hvm_rm(dev_info_t *dip) 2833 { 2834 xdf_hvm_entry_t *i; 2835 2836 mutex_enter(&xdf_hvm_list_lock); 2837 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); 2838 list_remove(&xdf_hvm_list, i); 2839 mutex_exit(&xdf_hvm_list_lock); 2840 2841 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); 2842 kmem_free(i, sizeof (*i)); 2843 } 2844 2845 static void 2846 xdf_hvm_init(void) 2847 { 2848 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), 2849 offsetof(xdf_hvm_entry_t, xdf_he_list)); 2850 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); 2851 } 2852 2853 static void 2854 xdf_hvm_fini(void) 2855 { 2856 ASSERT(list_head(&xdf_hvm_list) == NULL); 2857 list_destroy(&xdf_hvm_list); 2858 mutex_destroy(&xdf_hvm_list_lock); 2859 } 2860 2861 int 2862 xdf_hvm_connect(dev_info_t *dip) 2863 { 2864 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2865 int rv; 2866 2867 /* do cv_wait until connected or failed */ 2868 mutex_enter(&vdp->xdf_dev_lk); 2869 rv = xdf_connect(vdp, B_TRUE); 2870 mutex_exit(&vdp->xdf_dev_lk); 2871 return ((rv == XD_READY) ? 0 : -1); 2872 } 2873 2874 int 2875 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2876 { 2877 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2878 2879 /* sanity check the requested physical geometry */ 2880 mutex_enter(&vdp->xdf_dev_lk); 2881 if ((geomp->g_secsize != XB_BSIZE) || 2882 (geomp->g_capacity == 0)) { 2883 mutex_exit(&vdp->xdf_dev_lk); 2884 return (EINVAL); 2885 } 2886 2887 /* 2888 * If we've already connected to the backend device then make sure 2889 * we're not defining a physical geometry larger than our backend 2890 * device. 2891 */ 2892 if ((vdp->xdf_xdev_nblocks != 0) && 2893 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { 2894 mutex_exit(&vdp->xdf_dev_lk); 2895 return (EINVAL); 2896 } 2897 2898 vdp->xdf_pgeom = *geomp; 2899 mutex_exit(&vdp->xdf_dev_lk); 2900 2901 /* force a re-validation */ 2902 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 2903 2904 return (0); 2905 } 2906 2907 #endif /* XPV_HVM_DRIVER */ 2908