1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * xdf.c - Xen Virtual Block Device Driver 29 * TODO: 30 * - support alternate block size (currently only DEV_BSIZE supported) 31 * - revalidate geometry for removable devices 32 * 33 * This driver export solaris disk device nodes, accepts IO requests from 34 * those nodes, and services those requests by talking to a backend device 35 * in another domain. 36 * 37 * Communication with the backend device is done via a ringbuffer (which is 38 * managed via xvdi interfaces) and dma memory (which is managed via ddi 39 * interfaces). 40 * 41 * Communication with the backend device is dependant upon establishing a 42 * connection to the backend device. This connection process involves 43 * reading device configuration information from xenbus and publishing 44 * some frontend runtime configuration parameters via the xenbus (for 45 * consumption by the backend). Once we've published runtime configuration 46 * information via the xenbus, the backend device can enter the connected 47 * state and we'll enter the XD_CONNECTED state. But before we can allow 48 * random IO to begin, we need to do IO to the backend device to determine 49 * the device label and if flush operations are supported. Once this is 50 * done we enter the XD_READY state and can process any IO operations. 51 * 52 * We recieve notifications of xenbus state changes for the backend device 53 * (aka, the "other end") via the xdf_oe_change() callback. This callback 54 * is single threaded, meaning that we can't recieve new notification of 55 * other end state changes while we're processing an outstanding 56 * notification of an other end state change. There for we can't do any 57 * blocking operations from the xdf_oe_change() callback. This is why we 58 * have a seperate taskq (xdf_ready_tq) which exists to do the necessary 59 * IO to get us from the XD_CONNECTED to the XD_READY state. All IO 60 * generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go 61 * throught xdf_lb_rdwr(), which is a synchronous IO interface. IOs 62 * generated by the xdf_ready_tq_thread thread have priority over all 63 * other IO requests. 64 * 65 * We also communicate with the backend device via the xenbus "media-req" 66 * (XBP_MEDIA_REQ) property. For more information on this see the 67 * comments in blkif.h. 68 */ 69 70 #include <io/xdf.h> 71 72 #include <sys/conf.h> 73 #include <sys/dkio.h> 74 #include <sys/promif.h> 75 #include <sys/sysmacros.h> 76 #include <sys/kstat.h> 77 #include <sys/mach_mmu.h> 78 #ifdef XPV_HVM_DRIVER 79 #include <sys/xpv_support.h> 80 #include <sys/sunndi.h> 81 #else /* !XPV_HVM_DRIVER */ 82 #include <sys/evtchn_impl.h> 83 #endif /* !XPV_HVM_DRIVER */ 84 #include <public/io/xenbus.h> 85 #include <xen/sys/xenbus_impl.h> 86 #include <sys/scsi/generic/inquiry.h> 87 #include <xen/io/blkif_impl.h> 88 #include <sys/fdio.h> 89 #include <sys/cdio.h> 90 91 /* 92 * DEBUG_EVAL can be used to include debug only statements without 93 * having to use '#ifdef DEBUG' statements 94 */ 95 #ifdef DEBUG 96 #define DEBUG_EVAL(x) (x) 97 #else /* !DEBUG */ 98 #define DEBUG_EVAL(x) 99 #endif /* !DEBUG */ 100 101 #define XDF_DRAIN_MSEC_DELAY (50*1000) /* 00.05 sec */ 102 #define XDF_DRAIN_RETRY_COUNT 200 /* 10.00 sec */ 103 104 #define INVALID_DOMID ((domid_t)-1) 105 #define FLUSH_DISKCACHE 0x1 106 #define WRITE_BARRIER 0x2 107 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 108 #define USE_WRITE_BARRIER(vdp) \ 109 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 110 #define USE_FLUSH_DISKCACHE(vdp) \ 111 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 112 #define IS_WRITE_BARRIER(vdp, bp) \ 113 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 114 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 115 #define IS_FLUSH_DISKCACHE(bp) \ 116 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 117 118 #define VREQ_DONE(vreq) \ 119 VOID2BOOLEAN(((vreq)->v_status == VREQ_DMAWIN_DONE) && \ 120 (((vreq)->v_flush_diskcache == FLUSH_DISKCACHE) || \ 121 (((vreq)->v_dmaw + 1) == (vreq)->v_ndmaws))) 122 123 #define BP_VREQ(bp) ((v_req_t *)((bp)->av_back)) 124 #define BP_VREQ_SET(bp, vreq) (((bp)->av_back = (buf_t *)(vreq))) 125 126 extern int do_polled_io; 127 128 /* run-time tunables that we don't want the compiler to optimize away */ 129 volatile int xdf_debug = 0; 130 volatile boolean_t xdf_barrier_flush_disable = B_FALSE; 131 132 /* per module globals */ 133 major_t xdf_major; 134 static void *xdf_ssp; 135 static kmem_cache_t *xdf_vreq_cache; 136 static kmem_cache_t *xdf_gs_cache; 137 static int xdf_maxphys = XB_MAXPHYS; 138 static diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 139 static int xdf_fbrewrites; /* flush block re-write count */ 140 141 /* misc public functions (used by xdf_shell.c) */ 142 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); 143 int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 144 145 /* misc private functions */ 146 static void xdf_io_start(xdf_t *); 147 148 /* callbacks from commmon label */ 149 static cmlb_tg_ops_t xdf_lb_ops = { 150 TG_DK_OPS_VERSION_1, 151 xdf_lb_rdwr, 152 xdf_lb_getinfo 153 }; 154 155 /* 156 * I/O buffer DMA attributes 157 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 158 */ 159 static ddi_dma_attr_t xb_dma_attr = { 160 DMA_ATTR_V0, 161 (uint64_t)0, /* lowest address */ 162 (uint64_t)0xffffffffffffffff, /* highest usable address */ 163 (uint64_t)0xffffff, /* DMA counter limit max */ 164 (uint64_t)XB_BSIZE, /* alignment in bytes */ 165 XB_BSIZE - 1, /* bitmap of burst sizes */ 166 XB_BSIZE, /* min transfer */ 167 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 168 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 169 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 170 XB_BSIZE, /* granularity */ 171 0, /* flags (reserved) */ 172 }; 173 174 static ddi_device_acc_attr_t xc_acc_attr = { 175 DDI_DEVICE_ATTR_V0, 176 DDI_NEVERSWAP_ACC, 177 DDI_STRICTORDER_ACC 178 }; 179 180 static void 181 xdf_timeout_handler(void *arg) 182 { 183 xdf_t *vdp = arg; 184 185 mutex_enter(&vdp->xdf_dev_lk); 186 vdp->xdf_timeout_id = 0; 187 mutex_exit(&vdp->xdf_dev_lk); 188 189 /* new timeout thread could be re-scheduled */ 190 xdf_io_start(vdp); 191 } 192 193 /* 194 * callback func when DMA/GTE resources is available 195 * 196 * Note: we only register one callback function to grant table subsystem 197 * since we only have one 'struct gnttab_free_callback' in xdf_t. 198 */ 199 static int 200 xdf_dmacallback(caddr_t arg) 201 { 202 xdf_t *vdp = (xdf_t *)arg; 203 ASSERT(vdp != NULL); 204 205 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 206 vdp->xdf_addr)); 207 208 ddi_trigger_softintr(vdp->xdf_softintr_id); 209 return (DDI_DMA_CALLBACK_DONE); 210 } 211 212 static ge_slot_t * 213 gs_get(xdf_t *vdp, int isread) 214 { 215 grant_ref_t gh; 216 ge_slot_t *gs; 217 218 /* try to alloc GTEs needed in this slot, first */ 219 if (gnttab_alloc_grant_references( 220 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 221 if (vdp->xdf_gnt_callback.next == NULL) { 222 SETDMACBON(vdp); 223 gnttab_request_free_callback( 224 &vdp->xdf_gnt_callback, 225 (void (*)(void *))xdf_dmacallback, 226 (void *)vdp, 227 BLKIF_MAX_SEGMENTS_PER_REQUEST); 228 } 229 return (NULL); 230 } 231 232 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 233 if (gs == NULL) { 234 gnttab_free_grant_references(gh); 235 if (vdp->xdf_timeout_id == 0) 236 /* restart I/O after one second */ 237 vdp->xdf_timeout_id = 238 timeout(xdf_timeout_handler, vdp, hz); 239 return (NULL); 240 } 241 242 /* init gs_slot */ 243 gs->gs_oeid = vdp->xdf_peer; 244 gs->gs_isread = isread; 245 gs->gs_ghead = gh; 246 gs->gs_ngrefs = 0; 247 248 return (gs); 249 } 250 251 static void 252 gs_free(ge_slot_t *gs) 253 { 254 int i; 255 256 /* release all grant table entry resources used in this slot */ 257 for (i = 0; i < gs->gs_ngrefs; i++) 258 gnttab_end_foreign_access(gs->gs_ge[i], !gs->gs_isread, 0); 259 gnttab_free_grant_references(gs->gs_ghead); 260 list_remove(&gs->gs_vreq->v_gs, gs); 261 kmem_cache_free(xdf_gs_cache, gs); 262 } 263 264 static grant_ref_t 265 gs_grant(ge_slot_t *gs, mfn_t mfn) 266 { 267 grant_ref_t gr = gnttab_claim_grant_reference(&gs->gs_ghead); 268 269 ASSERT(gr != -1); 270 ASSERT(gs->gs_ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 271 gs->gs_ge[gs->gs_ngrefs++] = gr; 272 gnttab_grant_foreign_access_ref(gr, gs->gs_oeid, mfn, !gs->gs_isread); 273 274 return (gr); 275 } 276 277 /* 278 * Alloc a vreq for this bp 279 * bp->av_back contains the pointer to the vreq upon return 280 */ 281 static v_req_t * 282 vreq_get(xdf_t *vdp, buf_t *bp) 283 { 284 v_req_t *vreq = NULL; 285 286 ASSERT(BP_VREQ(bp) == NULL); 287 288 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 289 if (vreq == NULL) { 290 if (vdp->xdf_timeout_id == 0) 291 /* restart I/O after one second */ 292 vdp->xdf_timeout_id = 293 timeout(xdf_timeout_handler, vdp, hz); 294 return (NULL); 295 } 296 bzero(vreq, sizeof (v_req_t)); 297 list_create(&vreq->v_gs, sizeof (ge_slot_t), 298 offsetof(ge_slot_t, gs_vreq_link)); 299 vreq->v_buf = bp; 300 vreq->v_status = VREQ_INIT; 301 vreq->v_runq = B_FALSE; 302 BP_VREQ_SET(bp, vreq); 303 /* init of other fields in vreq is up to the caller */ 304 305 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 306 307 return (vreq); 308 } 309 310 static void 311 vreq_free(xdf_t *vdp, v_req_t *vreq) 312 { 313 buf_t *bp = vreq->v_buf; 314 315 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 316 ASSERT(BP_VREQ(bp) == vreq); 317 318 list_remove(&vdp->xdf_vreq_act, vreq); 319 320 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 321 goto done; 322 323 switch (vreq->v_status) { 324 case VREQ_DMAWIN_DONE: 325 case VREQ_GS_ALLOCED: 326 case VREQ_DMABUF_BOUND: 327 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 328 /*FALLTHRU*/ 329 case VREQ_DMAMEM_ALLOCED: 330 if (!ALIGNED_XFER(bp)) { 331 ASSERT(vreq->v_abuf != NULL); 332 if (!IS_ERROR(bp) && IS_READ(bp)) 333 bcopy(vreq->v_abuf, bp->b_un.b_addr, 334 bp->b_bcount); 335 ddi_dma_mem_free(&vreq->v_align); 336 } 337 /*FALLTHRU*/ 338 case VREQ_MEMDMAHDL_ALLOCED: 339 if (!ALIGNED_XFER(bp)) 340 ddi_dma_free_handle(&vreq->v_memdmahdl); 341 /*FALLTHRU*/ 342 case VREQ_DMAHDL_ALLOCED: 343 ddi_dma_free_handle(&vreq->v_dmahdl); 344 break; 345 default: 346 break; 347 } 348 done: 349 ASSERT(!vreq->v_runq); 350 list_destroy(&vreq->v_gs); 351 kmem_cache_free(xdf_vreq_cache, vreq); 352 } 353 354 /* 355 * Snarf new data if our flush block was re-written 356 */ 357 static void 358 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 359 { 360 int nblks; 361 boolean_t mapin; 362 363 if (IS_WRITE_BARRIER(vdp, bp)) 364 return; /* write was a flush write */ 365 366 mapin = B_FALSE; 367 nblks = bp->b_bcount >> DEV_BSHIFT; 368 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 369 xdf_fbrewrites++; 370 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 371 mapin = B_TRUE; 372 bp_mapin(bp); 373 } 374 bcopy(bp->b_un.b_addr + 375 ((xdf_flush_block - blkno) << DEV_BSHIFT), 376 vdp->xdf_cache_flush_block, DEV_BSIZE); 377 if (mapin) 378 bp_mapout(bp); 379 } 380 } 381 382 /* 383 * Initalize the DMA and grant table resources for the buf 384 */ 385 static int 386 vreq_setup(xdf_t *vdp, v_req_t *vreq) 387 { 388 int rc; 389 ddi_dma_attr_t dmaattr; 390 uint_t ndcs, ndws; 391 ddi_dma_handle_t dh; 392 ddi_dma_handle_t mdh; 393 ddi_dma_cookie_t dc; 394 ddi_acc_handle_t abh; 395 caddr_t aba; 396 ge_slot_t *gs; 397 size_t bufsz; 398 off_t off; 399 size_t sz; 400 buf_t *bp = vreq->v_buf; 401 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 402 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 403 404 switch (vreq->v_status) { 405 case VREQ_INIT: 406 if (IS_FLUSH_DISKCACHE(bp)) { 407 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 408 DPRINTF(DMA_DBG, ("xdf@%s: " 409 "get ge_slotfailed\n", vdp->xdf_addr)); 410 return (DDI_FAILURE); 411 } 412 vreq->v_blkno = 0; 413 vreq->v_nslots = 1; 414 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 415 vreq->v_status = VREQ_GS_ALLOCED; 416 gs->gs_vreq = vreq; 417 list_insert_head(&vreq->v_gs, gs); 418 return (DDI_SUCCESS); 419 } 420 421 if (IS_WRITE_BARRIER(vdp, bp)) 422 vreq->v_flush_diskcache = WRITE_BARRIER; 423 vreq->v_blkno = bp->b_blkno + 424 (diskaddr_t)(uintptr_t)bp->b_private; 425 /* See if we wrote new data to our flush block */ 426 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 427 check_fbwrite(vdp, bp, vreq->v_blkno); 428 vreq->v_status = VREQ_INIT_DONE; 429 /*FALLTHRU*/ 430 431 case VREQ_INIT_DONE: 432 /* 433 * alloc DMA handle 434 */ 435 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 436 xdf_dmacallback, (caddr_t)vdp, &dh); 437 if (rc != DDI_SUCCESS) { 438 SETDMACBON(vdp); 439 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 440 vdp->xdf_addr)); 441 return (DDI_FAILURE); 442 } 443 444 vreq->v_dmahdl = dh; 445 vreq->v_status = VREQ_DMAHDL_ALLOCED; 446 /*FALLTHRU*/ 447 448 case VREQ_DMAHDL_ALLOCED: 449 /* 450 * alloc dma handle for 512-byte aligned buf 451 */ 452 if (!ALIGNED_XFER(bp)) { 453 /* 454 * XXPV: we need to temporarily enlarge the seg 455 * boundary and s/g length to work round CR6381968 456 */ 457 dmaattr = xb_dma_attr; 458 dmaattr.dma_attr_seg = (uint64_t)-1; 459 dmaattr.dma_attr_sgllen = INT_MAX; 460 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 461 xdf_dmacallback, (caddr_t)vdp, &mdh); 462 if (rc != DDI_SUCCESS) { 463 SETDMACBON(vdp); 464 DPRINTF(DMA_DBG, ("xdf@%s: " 465 "unaligned buf DMAhandle alloc failed\n", 466 vdp->xdf_addr)); 467 return (DDI_FAILURE); 468 } 469 vreq->v_memdmahdl = mdh; 470 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 471 } 472 /*FALLTHRU*/ 473 474 case VREQ_MEMDMAHDL_ALLOCED: 475 /* 476 * alloc 512-byte aligned buf 477 */ 478 if (!ALIGNED_XFER(bp)) { 479 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 480 bp_mapin(bp); 481 482 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 483 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 484 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 485 &aba, &bufsz, &abh); 486 if (rc != DDI_SUCCESS) { 487 SETDMACBON(vdp); 488 DPRINTF(DMA_DBG, ("xdf@%s: " 489 "DMA mem allocation failed\n", 490 vdp->xdf_addr)); 491 return (DDI_FAILURE); 492 } 493 494 vreq->v_abuf = aba; 495 vreq->v_align = abh; 496 vreq->v_status = VREQ_DMAMEM_ALLOCED; 497 498 ASSERT(bufsz >= bp->b_bcount); 499 if (!IS_READ(bp)) 500 bcopy(bp->b_un.b_addr, vreq->v_abuf, 501 bp->b_bcount); 502 } 503 /*FALLTHRU*/ 504 505 case VREQ_DMAMEM_ALLOCED: 506 /* 507 * dma bind 508 */ 509 if (ALIGNED_XFER(bp)) { 510 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 511 dma_flags, xdf_dmacallback, (caddr_t)vdp, 512 &dc, &ndcs); 513 } else { 514 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 515 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 516 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 517 } 518 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 519 /* get num of dma windows */ 520 if (rc == DDI_DMA_PARTIAL_MAP) { 521 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 522 ASSERT(rc == DDI_SUCCESS); 523 } else { 524 ndws = 1; 525 } 526 } else { 527 SETDMACBON(vdp); 528 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 529 vdp->xdf_addr)); 530 return (DDI_FAILURE); 531 } 532 533 vreq->v_dmac = dc; 534 vreq->v_dmaw = 0; 535 vreq->v_ndmacs = ndcs; 536 vreq->v_ndmaws = ndws; 537 vreq->v_nslots = ndws; 538 vreq->v_status = VREQ_DMABUF_BOUND; 539 /*FALLTHRU*/ 540 541 case VREQ_DMABUF_BOUND: 542 /* 543 * get ge_slot, callback is set upon failure from gs_get(), 544 * if not set previously 545 */ 546 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 547 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 548 vdp->xdf_addr)); 549 return (DDI_FAILURE); 550 } 551 552 vreq->v_status = VREQ_GS_ALLOCED; 553 gs->gs_vreq = vreq; 554 list_insert_head(&vreq->v_gs, gs); 555 break; 556 557 case VREQ_GS_ALLOCED: 558 /* nothing need to be done */ 559 break; 560 561 case VREQ_DMAWIN_DONE: 562 /* 563 * move to the next dma window 564 */ 565 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 566 567 /* get a ge_slot for this DMA window */ 568 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 569 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 570 vdp->xdf_addr)); 571 return (DDI_FAILURE); 572 } 573 574 vreq->v_dmaw++; 575 VERIFY(ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 576 &vreq->v_dmac, &vreq->v_ndmacs) == DDI_SUCCESS); 577 vreq->v_status = VREQ_GS_ALLOCED; 578 gs->gs_vreq = vreq; 579 list_insert_head(&vreq->v_gs, gs); 580 break; 581 582 default: 583 return (DDI_FAILURE); 584 } 585 586 return (DDI_SUCCESS); 587 } 588 589 static int 590 xdf_cmlb_attach(xdf_t *vdp) 591 { 592 dev_info_t *dip = vdp->xdf_dip; 593 594 return (cmlb_attach(dip, &xdf_lb_ops, 595 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 596 XD_IS_RM(vdp), 597 B_TRUE, 598 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD, 599 #if defined(XPV_HVM_DRIVER) 600 (XD_IS_CD(vdp) ? 0 : CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT) | 601 CMLB_INTERNAL_MINOR_NODES, 602 #else /* !XPV_HVM_DRIVER */ 603 XD_IS_CD(vdp) ? 0 : CMLB_FAKE_LABEL_ONE_PARTITION, 604 #endif /* !XPV_HVM_DRIVER */ 605 vdp->xdf_vd_lbl, NULL)); 606 } 607 608 static void 609 xdf_io_err(buf_t *bp, int err, size_t resid) 610 { 611 bioerror(bp, err); 612 if (resid == 0) 613 bp->b_resid = bp->b_bcount; 614 biodone(bp); 615 } 616 617 static void 618 xdf_kstat_enter(xdf_t *vdp, buf_t *bp) 619 { 620 v_req_t *vreq = BP_VREQ(bp); 621 622 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 623 624 if (vdp->xdf_xdev_iostat == NULL) 625 return; 626 if ((vreq != NULL) && vreq->v_runq) { 627 kstat_runq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 628 } else { 629 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 630 } 631 } 632 633 static void 634 xdf_kstat_exit(xdf_t *vdp, buf_t *bp) 635 { 636 v_req_t *vreq = BP_VREQ(bp); 637 638 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 639 640 if (vdp->xdf_xdev_iostat == NULL) 641 return; 642 if ((vreq != NULL) && vreq->v_runq) { 643 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 644 } else { 645 kstat_waitq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 646 } 647 } 648 649 static void 650 xdf_kstat_waitq_to_runq(xdf_t *vdp, buf_t *bp) 651 { 652 v_req_t *vreq = BP_VREQ(bp); 653 654 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 655 ASSERT(!vreq->v_runq); 656 657 vreq->v_runq = B_TRUE; 658 if (vdp->xdf_xdev_iostat == NULL) 659 return; 660 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 661 } 662 663 static void 664 xdf_kstat_runq_to_waitq(xdf_t *vdp, buf_t *bp) 665 { 666 v_req_t *vreq = BP_VREQ(bp); 667 668 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 669 ASSERT(vreq->v_runq); 670 671 vreq->v_runq = B_FALSE; 672 if (vdp->xdf_xdev_iostat == NULL) 673 return; 674 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 675 } 676 677 int 678 xdf_kstat_create(dev_info_t *dip, char *ks_module, int instance) 679 { 680 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 681 kstat_t *kstat; 682 buf_t *bp; 683 684 if ((kstat = kstat_create( 685 ks_module, instance, NULL, "disk", 686 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) 687 return (-1); 688 689 /* See comment about locking in xdf_kstat_delete(). */ 690 mutex_enter(&vdp->xdf_iostat_lk); 691 mutex_enter(&vdp->xdf_dev_lk); 692 693 /* only one kstat can exist at a time */ 694 if (vdp->xdf_xdev_iostat != NULL) { 695 mutex_exit(&vdp->xdf_dev_lk); 696 mutex_exit(&vdp->xdf_iostat_lk); 697 kstat_delete(kstat); 698 return (-1); 699 } 700 701 vdp->xdf_xdev_iostat = kstat; 702 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 703 kstat_install(vdp->xdf_xdev_iostat); 704 705 /* 706 * Now that we've created a kstat, we need to update the waitq and 707 * runq counts for the kstat to reflect our current state. 708 * 709 * For a buf_t structure to be on the runq, it must have a ring 710 * buffer slot associated with it. To get a ring buffer slot the 711 * buf must first have a v_req_t and a ge_slot_t associated with it. 712 * Then when it is granted a ring buffer slot, v_runq will be set to 713 * true. 714 * 715 * For a buf_t structure to be on the waitq, it must not be on the 716 * runq. So to find all the buf_t's that should be on waitq, we 717 * walk the active buf list and add any buf_t's which aren't on the 718 * runq to the waitq. 719 */ 720 bp = vdp->xdf_f_act; 721 while (bp != NULL) { 722 xdf_kstat_enter(vdp, bp); 723 bp = bp->av_forw; 724 } 725 if (vdp->xdf_ready_tq_bp != NULL) 726 xdf_kstat_enter(vdp, vdp->xdf_ready_tq_bp); 727 728 mutex_exit(&vdp->xdf_dev_lk); 729 mutex_exit(&vdp->xdf_iostat_lk); 730 return (0); 731 } 732 733 void 734 xdf_kstat_delete(dev_info_t *dip) 735 { 736 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 737 kstat_t *kstat; 738 buf_t *bp; 739 740 /* 741 * The locking order here is xdf_iostat_lk and then xdf_dev_lk. 742 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer 743 * and the contents of the our kstat. xdf_iostat_lk is used 744 * to protect the allocation and freeing of the actual kstat. 745 * xdf_dev_lk can't be used for this purpose because kstat 746 * readers use it to access the contents of the kstat and 747 * hence it can't be held when calling kstat_delete(). 748 */ 749 mutex_enter(&vdp->xdf_iostat_lk); 750 mutex_enter(&vdp->xdf_dev_lk); 751 752 if (vdp->xdf_xdev_iostat == NULL) { 753 mutex_exit(&vdp->xdf_dev_lk); 754 mutex_exit(&vdp->xdf_iostat_lk); 755 return; 756 } 757 758 /* 759 * We're about to destroy the kstat structures, so it isn't really 760 * necessary to update the runq and waitq counts. But, since this 761 * isn't a hot code path we can afford to be a little pedantic and 762 * go ahead and decrement the runq and waitq kstat counters to zero 763 * before free'ing them. This helps us ensure that we've gotten all 764 * our accounting correct. 765 * 766 * For an explanation of how we determine which buffers go on the 767 * runq vs which go on the waitq, see the comments in 768 * xdf_kstat_create(). 769 */ 770 bp = vdp->xdf_f_act; 771 while (bp != NULL) { 772 xdf_kstat_exit(vdp, bp); 773 bp = bp->av_forw; 774 } 775 if (vdp->xdf_ready_tq_bp != NULL) 776 xdf_kstat_exit(vdp, vdp->xdf_ready_tq_bp); 777 778 kstat = vdp->xdf_xdev_iostat; 779 vdp->xdf_xdev_iostat = NULL; 780 mutex_exit(&vdp->xdf_dev_lk); 781 kstat_delete(kstat); 782 mutex_exit(&vdp->xdf_iostat_lk); 783 } 784 785 /* 786 * Add an IO requests onto the active queue. 787 * 788 * We have to detect IOs generated by xdf_ready_tq_thread. These IOs 789 * are used to establish a connection to the backend, so they recieve 790 * priority over all other IOs. Since xdf_ready_tq_thread only does 791 * synchronous IO, there can only be one xdf_ready_tq_thread request at any 792 * given time and we record the buf associated with that request in 793 * xdf_ready_tq_bp. 794 */ 795 static void 796 xdf_bp_push(xdf_t *vdp, buf_t *bp) 797 { 798 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 799 ASSERT(bp->av_forw == NULL); 800 801 xdf_kstat_enter(vdp, bp); 802 803 if (curthread == vdp->xdf_ready_tq_thread) { 804 /* new IO requests from the ready thread */ 805 ASSERT(vdp->xdf_ready_tq_bp == NULL); 806 vdp->xdf_ready_tq_bp = bp; 807 return; 808 } 809 810 /* this is normal IO request */ 811 ASSERT(bp != vdp->xdf_ready_tq_bp); 812 813 if (vdp->xdf_f_act == NULL) { 814 /* this is only only IO on the active queue */ 815 ASSERT(vdp->xdf_l_act == NULL); 816 ASSERT(vdp->xdf_i_act == NULL); 817 vdp->xdf_f_act = vdp->xdf_l_act = vdp->xdf_i_act = bp; 818 return; 819 } 820 821 /* add this IO to the tail of the active queue */ 822 vdp->xdf_l_act->av_forw = bp; 823 vdp->xdf_l_act = bp; 824 if (vdp->xdf_i_act == NULL) 825 vdp->xdf_i_act = bp; 826 } 827 828 static void 829 xdf_bp_pop(xdf_t *vdp, buf_t *bp) 830 { 831 buf_t *bp_iter; 832 833 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 834 ASSERT(VREQ_DONE(BP_VREQ(bp))); 835 836 if (vdp->xdf_ready_tq_bp == bp) { 837 /* we're done with a ready thread IO request */ 838 ASSERT(bp->av_forw == NULL); 839 vdp->xdf_ready_tq_bp = NULL; 840 return; 841 } 842 843 /* we're done with a normal IO request */ 844 ASSERT((bp->av_forw != NULL) || (bp == vdp->xdf_l_act)); 845 ASSERT((bp->av_forw == NULL) || (bp != vdp->xdf_l_act)); 846 ASSERT(VREQ_DONE(BP_VREQ(vdp->xdf_f_act))); 847 ASSERT(vdp->xdf_f_act != vdp->xdf_i_act); 848 849 if (bp == vdp->xdf_f_act) { 850 /* This IO was at the head of our active queue. */ 851 vdp->xdf_f_act = bp->av_forw; 852 if (bp == vdp->xdf_l_act) 853 vdp->xdf_l_act = NULL; 854 } else { 855 /* There IO finished before some other pending IOs. */ 856 bp_iter = vdp->xdf_f_act; 857 while (bp != bp_iter->av_forw) { 858 bp_iter = bp_iter->av_forw; 859 ASSERT(VREQ_DONE(BP_VREQ(bp_iter))); 860 ASSERT(bp_iter != vdp->xdf_i_act); 861 } 862 bp_iter->av_forw = bp->av_forw; 863 if (bp == vdp->xdf_l_act) 864 vdp->xdf_l_act = bp_iter; 865 } 866 bp->av_forw = NULL; 867 } 868 869 static buf_t * 870 xdf_bp_next(xdf_t *vdp) 871 { 872 v_req_t *vreq; 873 buf_t *bp; 874 875 if (vdp->xdf_state == XD_CONNECTED) { 876 /* 877 * If we're in the XD_CONNECTED state, we only service IOs 878 * from the xdf_ready_tq_thread thread. 879 */ 880 if ((bp = vdp->xdf_ready_tq_bp) == NULL) 881 return (NULL); 882 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 883 return (bp); 884 return (NULL); 885 } 886 887 /* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */ 888 if (vdp->xdf_state != XD_READY) 889 return (NULL); 890 891 ASSERT(vdp->xdf_ready_tq_bp == NULL); 892 for (;;) { 893 if ((bp = vdp->xdf_i_act) == NULL) 894 return (NULL); 895 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 896 return (bp); 897 898 /* advance the active buf index pointer */ 899 vdp->xdf_i_act = bp->av_forw; 900 } 901 } 902 903 static void 904 xdf_io_fini(xdf_t *vdp, uint64_t id, int bioerr) 905 { 906 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 907 v_req_t *vreq = gs->gs_vreq; 908 buf_t *bp = vreq->v_buf; 909 910 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 911 ASSERT(BP_VREQ(bp) == vreq); 912 913 gs_free(gs); 914 915 if (bioerr != 0) 916 bioerror(bp, bioerr); 917 ASSERT(vreq->v_nslots > 0); 918 if (--vreq->v_nslots > 0) 919 return; 920 921 /* remove this IO from our active queue */ 922 xdf_bp_pop(vdp, bp); 923 924 ASSERT(vreq->v_runq); 925 xdf_kstat_exit(vdp, bp); 926 vreq->v_runq = B_FALSE; 927 vreq_free(vdp, vreq); 928 929 if (IS_ERROR(bp)) { 930 xdf_io_err(bp, geterror(bp), 0); 931 } else if (bp->b_resid != 0) { 932 /* Partial transfers are an error */ 933 xdf_io_err(bp, EIO, bp->b_resid); 934 } else { 935 biodone(bp); 936 } 937 } 938 939 /* 940 * xdf interrupt handler 941 */ 942 static uint_t 943 xdf_intr_locked(xdf_t *vdp) 944 { 945 xendev_ring_t *xbr; 946 blkif_response_t *resp; 947 int bioerr; 948 uint64_t id; 949 uint8_t op; 950 uint16_t status; 951 ddi_acc_handle_t acchdl; 952 953 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 954 955 if ((xbr = vdp->xdf_xb_ring) == NULL) 956 return (DDI_INTR_UNCLAIMED); 957 958 acchdl = vdp->xdf_xb_ring_hdl; 959 960 /* 961 * complete all requests which have a response 962 */ 963 while (resp = xvdi_ring_get_response(xbr)) { 964 id = ddi_get64(acchdl, &resp->id); 965 op = ddi_get8(acchdl, &resp->operation); 966 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 967 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 968 op, id, status)); 969 970 if (status != BLKIF_RSP_OKAY) { 971 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 972 vdp->xdf_addr, 973 (op == BLKIF_OP_READ) ? "reading" : "writing")); 974 bioerr = EIO; 975 } else { 976 bioerr = 0; 977 } 978 979 xdf_io_fini(vdp, id, bioerr); 980 } 981 return (DDI_INTR_CLAIMED); 982 } 983 984 /* 985 * xdf_intr runs at PIL 5, so no one else can grab xdf_dev_lk and 986 * block at a lower pil. 987 */ 988 static uint_t 989 xdf_intr(caddr_t arg) 990 { 991 xdf_t *vdp = (xdf_t *)arg; 992 int rv; 993 994 mutex_enter(&vdp->xdf_dev_lk); 995 rv = xdf_intr_locked(vdp); 996 mutex_exit(&vdp->xdf_dev_lk); 997 998 if (!do_polled_io) 999 xdf_io_start(vdp); 1000 1001 return (rv); 1002 } 1003 1004 static void 1005 xdf_ring_push(xdf_t *vdp) 1006 { 1007 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1008 1009 if (vdp->xdf_xb_ring == NULL) 1010 return; 1011 1012 if (xvdi_ring_push_request(vdp->xdf_xb_ring)) { 1013 DPRINTF(IO_DBG, ( 1014 "xdf@%s: xdf_ring_push: sent request(s) to backend\n", 1015 vdp->xdf_addr)); 1016 } 1017 1018 if (xvdi_get_evtchn(vdp->xdf_dip) != INVALID_EVTCHN) 1019 xvdi_notify_oe(vdp->xdf_dip); 1020 } 1021 1022 static int 1023 xdf_ring_drain_locked(xdf_t *vdp) 1024 { 1025 int pollc, rv = 0; 1026 1027 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1028 1029 if (xdf_debug & SUSRES_DBG) 1030 xen_printf("xdf_ring_drain: start\n"); 1031 1032 for (pollc = 0; pollc < XDF_DRAIN_RETRY_COUNT; pollc++) { 1033 if (vdp->xdf_xb_ring == NULL) 1034 goto out; 1035 1036 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1037 (void) xdf_intr_locked(vdp); 1038 if (!xvdi_ring_has_incomp_request(vdp->xdf_xb_ring)) 1039 goto out; 1040 xdf_ring_push(vdp); 1041 1042 /* file-backed devices can be slow */ 1043 mutex_exit(&vdp->xdf_dev_lk); 1044 #ifdef XPV_HVM_DRIVER 1045 (void) HYPERVISOR_yield(); 1046 #endif /* XPV_HVM_DRIVER */ 1047 delay(drv_usectohz(XDF_DRAIN_MSEC_DELAY)); 1048 mutex_enter(&vdp->xdf_dev_lk); 1049 } 1050 cmn_err(CE_WARN, "xdf@%s: xdf_ring_drain: timeout", vdp->xdf_addr); 1051 1052 out: 1053 if (vdp->xdf_xb_ring != NULL) { 1054 if (xvdi_ring_has_incomp_request(vdp->xdf_xb_ring) || 1055 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1056 rv = EIO; 1057 } 1058 if (xdf_debug & SUSRES_DBG) 1059 xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n", 1060 vdp->xdf_addr, rv); 1061 return (rv); 1062 } 1063 1064 static int 1065 xdf_ring_drain(xdf_t *vdp) 1066 { 1067 int rv; 1068 mutex_enter(&vdp->xdf_dev_lk); 1069 rv = xdf_ring_drain_locked(vdp); 1070 mutex_exit(&vdp->xdf_dev_lk); 1071 return (rv); 1072 } 1073 1074 /* 1075 * Destroy all v_req_t, grant table entries, and our ring buffer. 1076 */ 1077 static void 1078 xdf_ring_destroy(xdf_t *vdp) 1079 { 1080 v_req_t *vreq; 1081 buf_t *bp; 1082 ge_slot_t *gs; 1083 1084 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1085 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1086 1087 if ((vdp->xdf_state != XD_INIT) && 1088 (vdp->xdf_state != XD_CONNECTED) && 1089 (vdp->xdf_state != XD_READY)) { 1090 ASSERT(vdp->xdf_xb_ring == NULL); 1091 ASSERT(vdp->xdf_xb_ring_hdl == NULL); 1092 ASSERT(vdp->xdf_peer == INVALID_DOMID); 1093 ASSERT(vdp->xdf_evtchn == INVALID_EVTCHN); 1094 ASSERT(list_is_empty(&vdp->xdf_vreq_act)); 1095 return; 1096 } 1097 1098 /* 1099 * We don't want to recieve async notifications from the backend 1100 * when it finishes processing ring entries. 1101 */ 1102 #ifdef XPV_HVM_DRIVER 1103 ec_unbind_evtchn(vdp->xdf_evtchn); 1104 #else /* !XPV_HVM_DRIVER */ 1105 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1106 #endif /* !XPV_HVM_DRIVER */ 1107 1108 /* 1109 * Drain any requests in the ring. We need to do this before we 1110 * can free grant table entries, because if active ring entries 1111 * point to grants, then the backend could be trying to access 1112 * those grants. 1113 */ 1114 (void) xdf_ring_drain_locked(vdp); 1115 1116 /* We're done talking to the backend so free up our event channel */ 1117 xvdi_free_evtchn(vdp->xdf_dip); 1118 vdp->xdf_evtchn = INVALID_EVTCHN; 1119 1120 while ((vreq = list_head(&vdp->xdf_vreq_act)) != NULL) { 1121 bp = vreq->v_buf; 1122 ASSERT(BP_VREQ(bp) == vreq); 1123 1124 /* Free up any grant table entries associaed with this IO */ 1125 while ((gs = list_head(&vreq->v_gs)) != NULL) 1126 gs_free(gs); 1127 1128 /* If this IO was on the runq, move it back to the waitq. */ 1129 if (vreq->v_runq) 1130 xdf_kstat_runq_to_waitq(vdp, bp); 1131 1132 /* 1133 * Reset any buf IO state since we're going to re-issue the 1134 * IO when we reconnect. 1135 */ 1136 vreq_free(vdp, vreq); 1137 BP_VREQ_SET(bp, NULL); 1138 bioerror(bp, 0); 1139 } 1140 1141 /* reset the active queue index pointer */ 1142 vdp->xdf_i_act = vdp->xdf_f_act; 1143 1144 /* Destroy the ring */ 1145 xvdi_free_ring(vdp->xdf_xb_ring); 1146 vdp->xdf_xb_ring = NULL; 1147 vdp->xdf_xb_ring_hdl = NULL; 1148 vdp->xdf_peer = INVALID_DOMID; 1149 } 1150 1151 void 1152 xdfmin(struct buf *bp) 1153 { 1154 if (bp->b_bcount > xdf_maxphys) 1155 bp->b_bcount = xdf_maxphys; 1156 } 1157 1158 /* 1159 * Check if we have a pending "eject" media request. 1160 */ 1161 static int 1162 xdf_eject_pending(xdf_t *vdp) 1163 { 1164 dev_info_t *dip = vdp->xdf_dip; 1165 char *xsname, *str; 1166 1167 if (!vdp->xdf_media_req_supported) 1168 return (B_FALSE); 1169 1170 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1171 (xenbus_read_str(xsname, XBP_MEDIA_REQ, &str) != 0)) 1172 return (B_FALSE); 1173 1174 if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) { 1175 strfree(str); 1176 return (B_FALSE); 1177 } 1178 strfree(str); 1179 return (B_TRUE); 1180 } 1181 1182 /* 1183 * Generate a media request. 1184 */ 1185 static int 1186 xdf_media_req(xdf_t *vdp, char *req, boolean_t media_required) 1187 { 1188 dev_info_t *dip = vdp->xdf_dip; 1189 char *xsname; 1190 1191 /* 1192 * we can't be holding xdf_dev_lk because xenbus_printf() can 1193 * block while waiting for a PIL 1 interrupt message. this 1194 * would cause a deadlock with xdf_intr() which needs to grab 1195 * xdf_dev_lk as well and runs at PIL 5. 1196 */ 1197 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1198 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1199 1200 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1201 return (ENXIO); 1202 1203 /* Check if we support media requests */ 1204 if (!XD_IS_CD(vdp) || !vdp->xdf_media_req_supported) 1205 return (ENOTTY); 1206 1207 /* If an eject is pending then don't allow any new requests */ 1208 if (xdf_eject_pending(vdp)) 1209 return (ENXIO); 1210 1211 /* Make sure that there is media present */ 1212 if (media_required && (vdp->xdf_xdev_nblocks == 0)) 1213 return (ENXIO); 1214 1215 /* We only allow operations when the device is ready and connected */ 1216 if (vdp->xdf_state != XD_READY) 1217 return (EIO); 1218 1219 if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ, "%s", req) != 0) 1220 return (EIO); 1221 1222 return (0); 1223 } 1224 1225 /* 1226 * populate a single blkif_request_t w/ a buf 1227 */ 1228 static void 1229 xdf_process_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1230 { 1231 grant_ref_t gr; 1232 uint8_t fsect, lsect; 1233 size_t bcnt; 1234 paddr_t dma_addr; 1235 off_t blk_off; 1236 dev_info_t *dip = vdp->xdf_dip; 1237 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1238 v_req_t *vreq = BP_VREQ(bp); 1239 uint64_t blkno = vreq->v_blkno; 1240 uint_t ndmacs = vreq->v_ndmacs; 1241 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1242 int seg = 0; 1243 int isread = IS_READ(bp); 1244 ge_slot_t *gs = list_head(&vreq->v_gs); 1245 1246 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1247 ASSERT(vreq->v_status == VREQ_GS_ALLOCED); 1248 1249 if (isread) 1250 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1251 else { 1252 switch (vreq->v_flush_diskcache) { 1253 case FLUSH_DISKCACHE: 1254 ddi_put8(acchdl, &rreq->operation, 1255 BLKIF_OP_FLUSH_DISKCACHE); 1256 ddi_put16(acchdl, &rreq->handle, vdev); 1257 ddi_put64(acchdl, &rreq->id, 1258 (uint64_t)(uintptr_t)(gs)); 1259 ddi_put8(acchdl, &rreq->nr_segments, 0); 1260 vreq->v_status = VREQ_DMAWIN_DONE; 1261 return; 1262 case WRITE_BARRIER: 1263 ddi_put8(acchdl, &rreq->operation, 1264 BLKIF_OP_WRITE_BARRIER); 1265 break; 1266 default: 1267 if (!vdp->xdf_wce) 1268 ddi_put8(acchdl, &rreq->operation, 1269 BLKIF_OP_WRITE_BARRIER); 1270 else 1271 ddi_put8(acchdl, &rreq->operation, 1272 BLKIF_OP_WRITE); 1273 break; 1274 } 1275 } 1276 1277 ddi_put16(acchdl, &rreq->handle, vdev); 1278 ddi_put64(acchdl, &rreq->sector_number, blkno); 1279 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(gs)); 1280 1281 /* 1282 * loop until all segments are populated or no more dma cookie in buf 1283 */ 1284 for (;;) { 1285 /* 1286 * Each segment of a blkif request can transfer up to 1287 * one 4K page of data. 1288 */ 1289 bcnt = vreq->v_dmac.dmac_size; 1290 dma_addr = vreq->v_dmac.dmac_laddress; 1291 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1292 fsect = blk_off >> XB_BSHIFT; 1293 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1294 1295 ASSERT(bcnt <= PAGESIZE); 1296 ASSERT((bcnt % XB_BSIZE) == 0); 1297 ASSERT((blk_off & XB_BMASK) == 0); 1298 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1299 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1300 1301 gr = gs_grant(gs, PATOMA(dma_addr) >> PAGESHIFT); 1302 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1303 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1304 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1305 1306 DPRINTF(IO_DBG, ( 1307 "xdf@%s: seg%d: dmacS %lu blk_off %ld\n", 1308 vdp->xdf_addr, seg, vreq->v_dmac.dmac_size, blk_off)); 1309 DPRINTF(IO_DBG, ( 1310 "xdf@%s: seg%d: fs %d ls %d gr %d dma 0x%"PRIx64"\n", 1311 vdp->xdf_addr, seg, fsect, lsect, gr, dma_addr)); 1312 1313 blkno += (bcnt >> XB_BSHIFT); 1314 seg++; 1315 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1316 if (--ndmacs) { 1317 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1318 continue; 1319 } 1320 1321 vreq->v_status = VREQ_DMAWIN_DONE; 1322 vreq->v_blkno = blkno; 1323 break; 1324 } 1325 ddi_put8(acchdl, &rreq->nr_segments, seg); 1326 DPRINTF(IO_DBG, ( 1327 "xdf@%s: xdf_process_rreq: request id=%"PRIx64" ready\n", 1328 vdp->xdf_addr, rreq->id)); 1329 } 1330 1331 static void 1332 xdf_io_start(xdf_t *vdp) 1333 { 1334 struct buf *bp; 1335 v_req_t *vreq; 1336 blkif_request_t *rreq; 1337 boolean_t rreqready = B_FALSE; 1338 1339 mutex_enter(&vdp->xdf_dev_lk); 1340 1341 /* 1342 * Populate the ring request(s). Loop until there is no buf to 1343 * transfer or no free slot available in I/O ring. 1344 */ 1345 for (;;) { 1346 /* don't start any new IO if we're suspending */ 1347 if (vdp->xdf_suspending) 1348 break; 1349 if ((bp = xdf_bp_next(vdp)) == NULL) 1350 break; 1351 1352 /* if the buf doesn't already have a vreq, allocate one */ 1353 if (((vreq = BP_VREQ(bp)) == NULL) && 1354 ((vreq = vreq_get(vdp, bp)) == NULL)) 1355 break; 1356 1357 /* alloc DMA/GTE resources */ 1358 if (vreq_setup(vdp, vreq) != DDI_SUCCESS) 1359 break; 1360 1361 /* get next blkif_request in the ring */ 1362 if ((rreq = xvdi_ring_get_request(vdp->xdf_xb_ring)) == NULL) 1363 break; 1364 bzero(rreq, sizeof (blkif_request_t)); 1365 rreqready = B_TRUE; 1366 1367 /* populate blkif_request with this buf */ 1368 xdf_process_rreq(vdp, bp, rreq); 1369 1370 /* 1371 * This buffer/vreq pair is has been allocated a ring buffer 1372 * resources, so if it isn't already in our runq, add it. 1373 */ 1374 if (!vreq->v_runq) 1375 xdf_kstat_waitq_to_runq(vdp, bp); 1376 } 1377 1378 /* Send the request(s) to the backend */ 1379 if (rreqready) 1380 xdf_ring_push(vdp); 1381 1382 mutex_exit(&vdp->xdf_dev_lk); 1383 } 1384 1385 1386 /* check if partition is open, -1 - check all partitions on the disk */ 1387 static boolean_t 1388 xdf_isopen(xdf_t *vdp, int partition) 1389 { 1390 int i; 1391 ulong_t parbit; 1392 boolean_t rval = B_FALSE; 1393 1394 ASSERT((partition == -1) || 1395 ((partition >= 0) || (partition < XDF_PEXT))); 1396 1397 if (partition == -1) 1398 parbit = (ulong_t)-1; 1399 else 1400 parbit = 1 << partition; 1401 1402 for (i = 0; i < OTYPCNT; i++) { 1403 if (vdp->xdf_vd_open[i] & parbit) 1404 rval = B_TRUE; 1405 } 1406 1407 return (rval); 1408 } 1409 1410 /* 1411 * The connection should never be closed as long as someone is holding 1412 * us open, there is pending IO, or someone is waiting waiting for a 1413 * connection. 1414 */ 1415 static boolean_t 1416 xdf_busy(xdf_t *vdp) 1417 { 1418 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1419 1420 if ((vdp->xdf_xb_ring != NULL) && 1421 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 1422 ASSERT(vdp->xdf_state != XD_CLOSED); 1423 return (B_TRUE); 1424 } 1425 1426 if (!list_is_empty(&vdp->xdf_vreq_act) || (vdp->xdf_f_act != NULL)) { 1427 ASSERT(vdp->xdf_state != XD_CLOSED); 1428 return (B_TRUE); 1429 } 1430 1431 if (xdf_isopen(vdp, -1)) { 1432 ASSERT(vdp->xdf_state != XD_CLOSED); 1433 return (B_TRUE); 1434 } 1435 1436 if (vdp->xdf_connect_req > 0) { 1437 ASSERT(vdp->xdf_state != XD_CLOSED); 1438 return (B_TRUE); 1439 } 1440 1441 return (B_FALSE); 1442 } 1443 1444 static void 1445 xdf_set_state(xdf_t *vdp, xdf_state_t new_state) 1446 { 1447 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1448 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1449 DPRINTF(DDI_DBG, ("xdf@%s: state change %d -> %d\n", 1450 vdp->xdf_addr, vdp->xdf_state, new_state)); 1451 vdp->xdf_state = new_state; 1452 cv_broadcast(&vdp->xdf_dev_cv); 1453 } 1454 1455 static void 1456 xdf_disconnect(xdf_t *vdp, xdf_state_t new_state, boolean_t quiet) 1457 { 1458 dev_info_t *dip = vdp->xdf_dip; 1459 boolean_t busy; 1460 1461 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1462 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1463 ASSERT((new_state == XD_UNKNOWN) || (new_state == XD_CLOSED)); 1464 1465 /* Check if we're already there. */ 1466 if (vdp->xdf_state == new_state) 1467 return; 1468 1469 mutex_enter(&vdp->xdf_dev_lk); 1470 busy = xdf_busy(vdp); 1471 1472 /* If we're already closed then there's nothing todo. */ 1473 if (vdp->xdf_state == XD_CLOSED) { 1474 ASSERT(!busy); 1475 xdf_set_state(vdp, new_state); 1476 mutex_exit(&vdp->xdf_dev_lk); 1477 return; 1478 } 1479 1480 #ifdef DEBUG 1481 /* UhOh. Warn the user that something bad has happened. */ 1482 if (!quiet && busy && (vdp->xdf_state == XD_READY) && 1483 (vdp->xdf_xdev_nblocks != 0)) { 1484 cmn_err(CE_WARN, "xdf@%s: disconnected while in use", 1485 vdp->xdf_addr); 1486 } 1487 #endif /* DEBUG */ 1488 1489 xdf_ring_destroy(vdp); 1490 1491 /* If we're busy then we can only go into the unknown state */ 1492 xdf_set_state(vdp, (busy) ? XD_UNKNOWN : new_state); 1493 mutex_exit(&vdp->xdf_dev_lk); 1494 1495 /* if we're closed now, let the other end know */ 1496 if (vdp->xdf_state == XD_CLOSED) 1497 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1498 } 1499 1500 1501 /* 1502 * Kick-off connect process 1503 * Status should be XD_UNKNOWN or XD_CLOSED 1504 * On success, status will be changed to XD_INIT 1505 * On error, it will be changed to XD_UNKNOWN 1506 */ 1507 static int 1508 xdf_setstate_init(xdf_t *vdp) 1509 { 1510 dev_info_t *dip = vdp->xdf_dip; 1511 xenbus_transaction_t xbt; 1512 grant_ref_t gref; 1513 char *xsname, *str; 1514 int rv; 1515 1516 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1517 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1518 ASSERT((vdp->xdf_state == XD_UNKNOWN) || 1519 (vdp->xdf_state == XD_CLOSED)); 1520 1521 DPRINTF(DDI_DBG, 1522 ("xdf@%s: starting connection process\n", vdp->xdf_addr)); 1523 1524 /* 1525 * If an eject is pending then don't allow a new connection. 1526 * (Only the backend can clear media request eject request.) 1527 */ 1528 if (xdf_eject_pending(vdp)) 1529 return (DDI_FAILURE); 1530 1531 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1532 goto errout; 1533 1534 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == INVALID_DOMID) 1535 goto errout; 1536 1537 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialising); 1538 1539 /* 1540 * Sanity check for the existance of the xenbus device-type property. 1541 * This property might not exist if we our xenbus device nodes was 1542 * force destroyed while we were still connected to the backend. 1543 */ 1544 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) 1545 goto errout; 1546 strfree(str); 1547 1548 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) 1549 goto errout; 1550 1551 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1552 #ifdef XPV_HVM_DRIVER 1553 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1554 #else /* !XPV_HVM_DRIVER */ 1555 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1556 DDI_SUCCESS) { 1557 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_init: " 1558 "failed to add intr handler", vdp->xdf_addr); 1559 goto errout1; 1560 } 1561 #endif /* !XPV_HVM_DRIVER */ 1562 1563 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1564 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1565 DDI_SUCCESS) { 1566 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1567 vdp->xdf_addr); 1568 goto errout2; 1569 } 1570 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1571 1572 /* 1573 * Write into xenstore the info needed by backend 1574 */ 1575 trans_retry: 1576 if (xenbus_transaction_start(&xbt)) { 1577 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1578 vdp->xdf_addr); 1579 xvdi_fatal_error(dip, EIO, "connect transaction init"); 1580 goto fail_trans; 1581 } 1582 1583 /* 1584 * XBP_PROTOCOL is written by the domain builder in the case of PV 1585 * domains. However, it is not written for HVM domains, so let's 1586 * write it here. 1587 */ 1588 if (((rv = xenbus_printf(xbt, xsname, 1589 XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE)) != 0) || 1590 ((rv = xenbus_printf(xbt, xsname, 1591 XBP_RING_REF, "%u", gref)) != 0) || 1592 ((rv = xenbus_printf(xbt, xsname, 1593 XBP_EVENT_CHAN, "%u", vdp->xdf_evtchn)) != 0) || 1594 ((rv = xenbus_printf(xbt, xsname, 1595 XBP_PROTOCOL, "%s", XEN_IO_PROTO_ABI_NATIVE)) != 0) || 1596 ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0)) { 1597 (void) xenbus_transaction_end(xbt, 1); 1598 xvdi_fatal_error(dip, rv, "connect transaction setup"); 1599 goto fail_trans; 1600 } 1601 1602 /* kick-off connect process */ 1603 if (rv = xenbus_transaction_end(xbt, 0)) { 1604 if (rv == EAGAIN) 1605 goto trans_retry; 1606 xvdi_fatal_error(dip, rv, "connect transaction commit"); 1607 goto fail_trans; 1608 } 1609 1610 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1611 mutex_enter(&vdp->xdf_dev_lk); 1612 xdf_set_state(vdp, XD_INIT); 1613 mutex_exit(&vdp->xdf_dev_lk); 1614 1615 return (DDI_SUCCESS); 1616 1617 fail_trans: 1618 xvdi_free_ring(vdp->xdf_xb_ring); 1619 errout2: 1620 #ifdef XPV_HVM_DRIVER 1621 ec_unbind_evtchn(vdp->xdf_evtchn); 1622 #else /* !XPV_HVM_DRIVER */ 1623 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1624 #endif /* !XPV_HVM_DRIVER */ 1625 errout1: 1626 xvdi_free_evtchn(dip); 1627 vdp->xdf_evtchn = INVALID_EVTCHN; 1628 errout: 1629 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1630 cmn_err(CE_WARN, "xdf@%s: failed to start connection to backend", 1631 vdp->xdf_addr); 1632 return (DDI_FAILURE); 1633 } 1634 1635 int 1636 xdf_get_flush_block(xdf_t *vdp) 1637 { 1638 /* 1639 * Get a DEV_BSIZE aligned bufer 1640 */ 1641 vdp->xdf_flush_mem = kmem_alloc(DEV_BSIZE * 2, KM_SLEEP); 1642 vdp->xdf_cache_flush_block = 1643 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), DEV_BSIZE); 1644 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1645 xdf_flush_block, DEV_BSIZE, NULL) != 0) 1646 return (DDI_FAILURE); 1647 return (DDI_SUCCESS); 1648 } 1649 1650 static void 1651 xdf_setstate_ready(void *arg) 1652 { 1653 xdf_t *vdp = (xdf_t *)arg; 1654 1655 vdp->xdf_ready_tq_thread = curthread; 1656 1657 /* 1658 * We've created all the minor nodes via cmlb_attach() using default 1659 * value in xdf_attach() to make it possible to block in xdf_open(), 1660 * in case there's anyone (say, booting thread) ever trying to open 1661 * it before connected to backend. We will refresh all those minor 1662 * nodes w/ latest info we've got now when we are almost connected. 1663 */ 1664 mutex_enter(&vdp->xdf_dev_lk); 1665 if (vdp->xdf_cmbl_reattach) { 1666 vdp->xdf_cmbl_reattach = B_FALSE; 1667 1668 mutex_exit(&vdp->xdf_dev_lk); 1669 if (xdf_cmlb_attach(vdp) != 0) { 1670 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1671 return; 1672 } 1673 mutex_enter(&vdp->xdf_dev_lk); 1674 } 1675 1676 /* If we're not still trying to get to the ready state, then bail. */ 1677 if (vdp->xdf_state != XD_CONNECTED) { 1678 mutex_exit(&vdp->xdf_dev_lk); 1679 return; 1680 } 1681 mutex_exit(&vdp->xdf_dev_lk); 1682 1683 /* 1684 * If backend has feature-barrier, see if it supports disk 1685 * cache flush op. 1686 */ 1687 vdp->xdf_flush_supported = B_FALSE; 1688 if (vdp->xdf_feature_barrier) { 1689 /* 1690 * Pretend we already know flush is supported so probe 1691 * will attempt the correct op. 1692 */ 1693 vdp->xdf_flush_supported = B_TRUE; 1694 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1695 vdp->xdf_flush_supported = B_TRUE; 1696 } else { 1697 vdp->xdf_flush_supported = B_FALSE; 1698 /* 1699 * If the other end does not support the cache flush op 1700 * then we must use a barrier-write to force disk 1701 * cache flushing. Barrier writes require that a data 1702 * block actually be written. 1703 * Cache a block to barrier-write when we are 1704 * asked to perform a flush. 1705 * XXX - would it be better to just copy 1 block 1706 * (512 bytes) from whatever write we did last 1707 * and rewrite that block? 1708 */ 1709 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) { 1710 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1711 return; 1712 } 1713 } 1714 } 1715 1716 mutex_enter(&vdp->xdf_cb_lk); 1717 mutex_enter(&vdp->xdf_dev_lk); 1718 if (vdp->xdf_state == XD_CONNECTED) 1719 xdf_set_state(vdp, XD_READY); 1720 mutex_exit(&vdp->xdf_dev_lk); 1721 1722 /* Restart any currently queued up io */ 1723 xdf_io_start(vdp); 1724 1725 mutex_exit(&vdp->xdf_cb_lk); 1726 } 1727 1728 /* 1729 * synthetic geometry 1730 */ 1731 #define XDF_NSECTS 256 1732 #define XDF_NHEADS 16 1733 1734 static void 1735 xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp) 1736 { 1737 xdf_t *vdp; 1738 uint_t ncyl; 1739 1740 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 1741 1742 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); 1743 1744 bzero(geomp, sizeof (*geomp)); 1745 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1746 geomp->g_acyl = 0; 1747 geomp->g_nhead = XDF_NHEADS; 1748 geomp->g_nsect = XDF_NSECTS; 1749 geomp->g_secsize = XB_BSIZE; 1750 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1751 geomp->g_intrlv = 0; 1752 geomp->g_rpm = 7200; 1753 } 1754 1755 /* 1756 * Finish other initialization after we've connected to backend 1757 * Status should be XD_INIT before calling this routine 1758 * On success, status should be changed to XD_CONNECTED. 1759 * On error, status should stay XD_INIT 1760 */ 1761 static int 1762 xdf_setstate_connected(xdf_t *vdp) 1763 { 1764 dev_info_t *dip = vdp->xdf_dip; 1765 cmlb_geom_t pgeom; 1766 diskaddr_t nblocks = 0; 1767 char *oename, *xsname, *str; 1768 uint_t dinfo; 1769 1770 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1771 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1772 ASSERT(vdp->xdf_state == XD_INIT); 1773 1774 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1775 ((oename = xvdi_get_oename(dip)) == NULL)) 1776 return (DDI_FAILURE); 1777 1778 /* Make sure the other end is XenbusStateConnected */ 1779 if (xenbus_read_driver_state(oename) != XenbusStateConnected) 1780 return (DDI_FAILURE); 1781 1782 /* Determine if feature barrier is supported by backend */ 1783 if (!(vdp->xdf_feature_barrier = xenbus_exists(oename, XBP_FB))) 1784 cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier", 1785 vdp->xdf_addr); 1786 1787 /* 1788 * Probe backend. Read the device size into xdf_xdev_nblocks 1789 * and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE 1790 * flags in xdf_dinfo. If the emulated device type is "cdrom", 1791 * we always set VDISK_CDROM, regardless of if it's present in 1792 * the xenbus info parameter. 1793 */ 1794 if (xenbus_gather(XBT_NULL, oename, 1795 XBP_SECTORS, "%"SCNu64, &nblocks, 1796 XBP_INFO, "%u", &dinfo, 1797 NULL) != 0) { 1798 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1799 "cannot read backend info", vdp->xdf_addr); 1800 return (DDI_FAILURE); 1801 } 1802 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 1803 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 1804 vdp->xdf_addr); 1805 return (DDI_FAILURE); 1806 } 1807 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 1808 dinfo |= VDISK_CDROM; 1809 strfree(str); 1810 1811 vdp->xdf_xdev_nblocks = nblocks; 1812 #ifdef _ILP32 1813 if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) { 1814 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1815 "backend disk device too large with %llu blocks for" 1816 " 32-bit kernel", vdp->xdf_addr, vdp->xdf_xdev_nblocks); 1817 xvdi_fatal_error(dip, EFBIG, "reading backend info"); 1818 return (DDI_FAILURE); 1819 } 1820 #endif 1821 1822 /* 1823 * If the physical geometry for a fixed disk has been explicity 1824 * set then make sure that the specified physical geometry isn't 1825 * larger than the device we connected to. 1826 */ 1827 if (vdp->xdf_pgeom_fixed && 1828 (vdp->xdf_pgeom.g_capacity > vdp->xdf_xdev_nblocks)) { 1829 cmn_err(CE_WARN, 1830 "xdf@%s: connect failed, fixed geometry too large", 1831 vdp->xdf_addr); 1832 return (DDI_FAILURE); 1833 } 1834 1835 vdp->xdf_media_req_supported = xenbus_exists(oename, XBP_MEDIA_REQ_SUP); 1836 1837 /* mark vbd is ready for I/O */ 1838 mutex_enter(&vdp->xdf_dev_lk); 1839 xdf_set_state(vdp, XD_CONNECTED); 1840 1841 /* check if the cmlb label should be updated */ 1842 xdf_synthetic_pgeom(dip, &pgeom); 1843 if ((vdp->xdf_dinfo != dinfo) || 1844 (!vdp->xdf_pgeom_fixed && 1845 (memcmp(&vdp->xdf_pgeom, &pgeom, sizeof (pgeom)) != 0))) { 1846 vdp->xdf_cmbl_reattach = B_TRUE; 1847 1848 vdp->xdf_dinfo = dinfo; 1849 if (!vdp->xdf_pgeom_fixed) 1850 vdp->xdf_pgeom = pgeom; 1851 } 1852 1853 if (XD_IS_CD(vdp) || XD_IS_RM(vdp)) { 1854 if (vdp->xdf_xdev_nblocks == 0) { 1855 vdp->xdf_mstate = DKIO_EJECTED; 1856 cv_broadcast(&vdp->xdf_mstate_cv); 1857 } else { 1858 vdp->xdf_mstate = DKIO_INSERTED; 1859 cv_broadcast(&vdp->xdf_mstate_cv); 1860 } 1861 } else { 1862 if (vdp->xdf_mstate != DKIO_NONE) { 1863 vdp->xdf_mstate = DKIO_NONE; 1864 cv_broadcast(&vdp->xdf_mstate_cv); 1865 } 1866 } 1867 1868 mutex_exit(&vdp->xdf_dev_lk); 1869 1870 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", vdp->xdf_addr, 1871 (uint64_t)vdp->xdf_xdev_nblocks); 1872 1873 /* Restart any currently queued up io */ 1874 xdf_io_start(vdp); 1875 1876 /* 1877 * To get to the ready state we have to do IO to the backend device, 1878 * but we can't initiate IO from the other end change callback thread 1879 * (which is the current context we're executing in.) This is because 1880 * if the other end disconnects while we're doing IO from the callback 1881 * thread, then we can't recieve that disconnect event and we hang 1882 * waiting for an IO that can never complete. 1883 */ 1884 (void) ddi_taskq_dispatch(vdp->xdf_ready_tq, xdf_setstate_ready, vdp, 1885 DDI_SLEEP); 1886 1887 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1888 return (DDI_SUCCESS); 1889 } 1890 1891 /*ARGSUSED*/ 1892 static void 1893 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 1894 { 1895 XenbusState new_state = *(XenbusState *)impl_data; 1896 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 1897 1898 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 1899 vdp->xdf_addr, new_state)); 1900 1901 mutex_enter(&vdp->xdf_cb_lk); 1902 1903 /* We assume that this callback is single threaded */ 1904 ASSERT(vdp->xdf_oe_change_thread == NULL); 1905 DEBUG_EVAL(vdp->xdf_oe_change_thread = curthread); 1906 1907 /* ignore any backend state changes if we're suspending/suspended */ 1908 if (vdp->xdf_suspending || (vdp->xdf_state == XD_SUSPEND)) { 1909 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1910 mutex_exit(&vdp->xdf_cb_lk); 1911 return; 1912 } 1913 1914 switch (new_state) { 1915 case XenbusStateUnknown: 1916 case XenbusStateInitialising: 1917 case XenbusStateInitWait: 1918 case XenbusStateInitialised: 1919 if (vdp->xdf_state == XD_INIT) 1920 break; 1921 1922 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1923 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1924 break; 1925 ASSERT(vdp->xdf_state == XD_INIT); 1926 break; 1927 1928 case XenbusStateConnected: 1929 if ((vdp->xdf_state == XD_CONNECTED) || 1930 (vdp->xdf_state == XD_READY)) 1931 break; 1932 1933 if (vdp->xdf_state != XD_INIT) { 1934 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1935 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1936 break; 1937 ASSERT(vdp->xdf_state == XD_INIT); 1938 } 1939 1940 if (xdf_setstate_connected(vdp) != DDI_SUCCESS) { 1941 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1942 break; 1943 } 1944 ASSERT(vdp->xdf_state == XD_CONNECTED); 1945 break; 1946 1947 case XenbusStateClosing: 1948 if (xdf_isopen(vdp, -1)) { 1949 cmn_err(CE_NOTE, 1950 "xdf@%s: hot-unplug failed, still in use", 1951 vdp->xdf_addr); 1952 break; 1953 } 1954 /*FALLTHROUGH*/ 1955 case XenbusStateClosed: 1956 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 1957 break; 1958 } 1959 1960 /* notify anybody waiting for oe state change */ 1961 cv_broadcast(&vdp->xdf_dev_cv); 1962 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1963 mutex_exit(&vdp->xdf_cb_lk); 1964 } 1965 1966 static int 1967 xdf_connect_locked(xdf_t *vdp, boolean_t wait) 1968 { 1969 int rv, timeouts = 0, reset = 20; 1970 1971 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1972 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1973 1974 /* we can't connect once we're in the closed state */ 1975 if (vdp->xdf_state == XD_CLOSED) 1976 return (XD_CLOSED); 1977 1978 vdp->xdf_connect_req++; 1979 while (vdp->xdf_state != XD_READY) { 1980 mutex_exit(&vdp->xdf_dev_lk); 1981 1982 /* only one thread at a time can be the connection thread */ 1983 if (vdp->xdf_connect_thread == NULL) 1984 vdp->xdf_connect_thread = curthread; 1985 1986 if (vdp->xdf_connect_thread == curthread) { 1987 if ((timeouts > 0) && ((timeouts % reset) == 0)) { 1988 /* 1989 * If we haven't establised a connection 1990 * within the reset time, then disconnect 1991 * so we can try again, and double the reset 1992 * time. The reset time starts at 2 sec. 1993 */ 1994 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 1995 reset *= 2; 1996 } 1997 if (vdp->xdf_state == XD_UNKNOWN) 1998 (void) xdf_setstate_init(vdp); 1999 if (vdp->xdf_state == XD_INIT) 2000 (void) xdf_setstate_connected(vdp); 2001 } 2002 2003 mutex_enter(&vdp->xdf_dev_lk); 2004 if (!wait || (vdp->xdf_state == XD_READY)) 2005 goto out; 2006 2007 mutex_exit((&vdp->xdf_cb_lk)); 2008 if (vdp->xdf_connect_thread != curthread) { 2009 rv = cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk); 2010 } else { 2011 /* delay for 0.1 sec */ 2012 rv = cv_timedwait_sig(&vdp->xdf_dev_cv, 2013 &vdp->xdf_dev_lk, lbolt + drv_usectohz(100*1000)); 2014 if (rv == -1) 2015 timeouts++; 2016 } 2017 mutex_exit((&vdp->xdf_dev_lk)); 2018 mutex_enter((&vdp->xdf_cb_lk)); 2019 mutex_enter((&vdp->xdf_dev_lk)); 2020 if (rv == 0) 2021 goto out; 2022 } 2023 2024 out: 2025 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2026 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 2027 2028 if (vdp->xdf_connect_thread == curthread) { 2029 /* 2030 * wake up someone else so they can become the connection 2031 * thread. 2032 */ 2033 cv_signal(&vdp->xdf_dev_cv); 2034 vdp->xdf_connect_thread = NULL; 2035 } 2036 2037 /* Try to lock the media */ 2038 mutex_exit((&vdp->xdf_dev_lk)); 2039 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2040 mutex_enter((&vdp->xdf_dev_lk)); 2041 2042 vdp->xdf_connect_req--; 2043 return (vdp->xdf_state); 2044 } 2045 2046 static uint_t 2047 xdf_iorestart(caddr_t arg) 2048 { 2049 xdf_t *vdp = (xdf_t *)arg; 2050 2051 ASSERT(vdp != NULL); 2052 2053 mutex_enter(&vdp->xdf_dev_lk); 2054 ASSERT(ISDMACBON(vdp)); 2055 SETDMACBOFF(vdp); 2056 mutex_exit(&vdp->xdf_dev_lk); 2057 2058 xdf_io_start(vdp); 2059 2060 return (DDI_INTR_CLAIMED); 2061 } 2062 2063 #if defined(XPV_HVM_DRIVER) 2064 2065 typedef struct xdf_hvm_entry { 2066 list_node_t xdf_he_list; 2067 char *xdf_he_path; 2068 dev_info_t *xdf_he_dip; 2069 } xdf_hvm_entry_t; 2070 2071 static list_t xdf_hvm_list; 2072 static kmutex_t xdf_hvm_list_lock; 2073 2074 static xdf_hvm_entry_t * 2075 i_xdf_hvm_find(const char *path, dev_info_t *dip) 2076 { 2077 xdf_hvm_entry_t *i; 2078 2079 ASSERT((path != NULL) || (dip != NULL)); 2080 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); 2081 2082 i = list_head(&xdf_hvm_list); 2083 while (i != NULL) { 2084 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { 2085 i = list_next(&xdf_hvm_list, i); 2086 continue; 2087 } 2088 if ((dip != NULL) && (i->xdf_he_dip != dip)) { 2089 i = list_next(&xdf_hvm_list, i); 2090 continue; 2091 } 2092 break; 2093 } 2094 return (i); 2095 } 2096 2097 dev_info_t * 2098 xdf_hvm_hold(const char *path) 2099 { 2100 xdf_hvm_entry_t *i; 2101 dev_info_t *dip; 2102 2103 mutex_enter(&xdf_hvm_list_lock); 2104 i = i_xdf_hvm_find(path, NULL); 2105 if (i == NULL) { 2106 mutex_exit(&xdf_hvm_list_lock); 2107 return (B_FALSE); 2108 } 2109 ndi_hold_devi(dip = i->xdf_he_dip); 2110 mutex_exit(&xdf_hvm_list_lock); 2111 return (dip); 2112 } 2113 2114 static void 2115 xdf_hvm_add(dev_info_t *dip) 2116 { 2117 xdf_hvm_entry_t *i; 2118 char *path; 2119 2120 /* figure out the path for the dip */ 2121 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 2122 (void) ddi_pathname(dip, path); 2123 2124 i = kmem_alloc(sizeof (*i), KM_SLEEP); 2125 i->xdf_he_dip = dip; 2126 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); 2127 2128 mutex_enter(&xdf_hvm_list_lock); 2129 ASSERT(i_xdf_hvm_find(path, NULL) == NULL); 2130 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); 2131 list_insert_head(&xdf_hvm_list, i); 2132 mutex_exit(&xdf_hvm_list_lock); 2133 2134 kmem_free(path, MAXPATHLEN); 2135 } 2136 2137 static void 2138 xdf_hvm_rm(dev_info_t *dip) 2139 { 2140 xdf_hvm_entry_t *i; 2141 2142 mutex_enter(&xdf_hvm_list_lock); 2143 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); 2144 list_remove(&xdf_hvm_list, i); 2145 mutex_exit(&xdf_hvm_list_lock); 2146 2147 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); 2148 kmem_free(i, sizeof (*i)); 2149 } 2150 2151 static void 2152 xdf_hvm_init(void) 2153 { 2154 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), 2155 offsetof(xdf_hvm_entry_t, xdf_he_list)); 2156 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); 2157 } 2158 2159 static void 2160 xdf_hvm_fini(void) 2161 { 2162 ASSERT(list_head(&xdf_hvm_list) == NULL); 2163 list_destroy(&xdf_hvm_list); 2164 mutex_destroy(&xdf_hvm_list_lock); 2165 } 2166 2167 boolean_t 2168 xdf_hvm_connect(dev_info_t *dip) 2169 { 2170 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2171 char *oename, *str; 2172 int rv; 2173 2174 mutex_enter(&vdp->xdf_cb_lk); 2175 2176 /* 2177 * Before try to establish a connection we need to wait for the 2178 * backend hotplug scripts to have run. Once they are run the 2179 * "<oename>/hotplug-status" property will be set to "connected". 2180 */ 2181 for (;;) { 2182 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2183 2184 /* 2185 * Get the xenbus path to the backend device. Note that 2186 * we can't cache this path (and we look it up on each pass 2187 * through this loop) because it could change during 2188 * suspend, resume, and migration operations. 2189 */ 2190 if ((oename = xvdi_get_oename(dip)) == NULL) { 2191 mutex_exit(&vdp->xdf_cb_lk); 2192 return (B_FALSE); 2193 } 2194 2195 str = NULL; 2196 if ((xenbus_read_str(oename, XBP_HP_STATUS, &str) == 0) && 2197 (strcmp(str, XBV_HP_STATUS_CONN) == 0)) 2198 break; 2199 2200 if (str != NULL) 2201 strfree(str); 2202 2203 /* wait for an update to "<oename>/hotplug-status" */ 2204 if (cv_wait_sig(&vdp->xdf_hp_status_cv, &vdp->xdf_cb_lk) == 0) { 2205 /* we got interrupted by a signal */ 2206 mutex_exit(&vdp->xdf_cb_lk); 2207 return (B_FALSE); 2208 } 2209 } 2210 2211 /* Good news. The backend hotplug scripts have been run. */ 2212 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2213 ASSERT(strcmp(str, XBV_HP_STATUS_CONN) == 0); 2214 strfree(str); 2215 2216 /* 2217 * If we're emulating a cd device and if the backend doesn't support 2218 * media request opreations, then we're not going to bother trying 2219 * to establish a connection for a couple reasons. First off, media 2220 * requests support is required to support operations like eject and 2221 * media locking. Second, other backend platforms like Linux don't 2222 * support hvm pv cdrom access. They don't even have a backend pv 2223 * driver for cdrom device nodes, so we don't want to block forever 2224 * waiting for a connection to a backend driver that doesn't exist. 2225 */ 2226 if (XD_IS_CD(vdp) && !xenbus_exists(oename, XBP_MEDIA_REQ_SUP)) { 2227 mutex_exit(&vdp->xdf_cb_lk); 2228 return (B_FALSE); 2229 } 2230 2231 mutex_enter(&vdp->xdf_dev_lk); 2232 rv = xdf_connect_locked(vdp, B_TRUE); 2233 mutex_exit(&vdp->xdf_dev_lk); 2234 mutex_exit(&vdp->xdf_cb_lk); 2235 2236 return ((rv == XD_READY) ? B_TRUE : B_FALSE); 2237 } 2238 2239 int 2240 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2241 { 2242 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2243 2244 /* sanity check the requested physical geometry */ 2245 mutex_enter(&vdp->xdf_dev_lk); 2246 if ((geomp->g_secsize != XB_BSIZE) || 2247 (geomp->g_capacity == 0)) { 2248 mutex_exit(&vdp->xdf_dev_lk); 2249 return (EINVAL); 2250 } 2251 2252 /* 2253 * If we've already connected to the backend device then make sure 2254 * we're not defining a physical geometry larger than our backend 2255 * device. 2256 */ 2257 if ((vdp->xdf_xdev_nblocks != 0) && 2258 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { 2259 mutex_exit(&vdp->xdf_dev_lk); 2260 return (EINVAL); 2261 } 2262 2263 bzero(&vdp->xdf_pgeom, sizeof (vdp->xdf_pgeom)); 2264 vdp->xdf_pgeom.g_ncyl = geomp->g_ncyl; 2265 vdp->xdf_pgeom.g_acyl = geomp->g_acyl; 2266 vdp->xdf_pgeom.g_nhead = geomp->g_nhead; 2267 vdp->xdf_pgeom.g_nsect = geomp->g_nsect; 2268 vdp->xdf_pgeom.g_secsize = geomp->g_secsize; 2269 vdp->xdf_pgeom.g_capacity = geomp->g_capacity; 2270 vdp->xdf_pgeom.g_intrlv = geomp->g_intrlv; 2271 vdp->xdf_pgeom.g_rpm = geomp->g_rpm; 2272 2273 vdp->xdf_pgeom_fixed = B_TRUE; 2274 mutex_exit(&vdp->xdf_dev_lk); 2275 2276 /* force a re-validation */ 2277 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 2278 2279 return (0); 2280 } 2281 2282 boolean_t 2283 xdf_is_cd(dev_info_t *dip) 2284 { 2285 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2286 boolean_t rv; 2287 2288 mutex_enter(&vdp->xdf_cb_lk); 2289 rv = XD_IS_CD(vdp); 2290 mutex_exit(&vdp->xdf_cb_lk); 2291 return (rv); 2292 } 2293 2294 boolean_t 2295 xdf_is_rm(dev_info_t *dip) 2296 { 2297 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2298 boolean_t rv; 2299 2300 mutex_enter(&vdp->xdf_cb_lk); 2301 rv = XD_IS_RM(vdp); 2302 mutex_exit(&vdp->xdf_cb_lk); 2303 return (rv); 2304 } 2305 2306 boolean_t 2307 xdf_media_req_supported(dev_info_t *dip) 2308 { 2309 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2310 boolean_t rv; 2311 2312 mutex_enter(&vdp->xdf_cb_lk); 2313 rv = vdp->xdf_media_req_supported; 2314 mutex_exit(&vdp->xdf_cb_lk); 2315 return (rv); 2316 } 2317 2318 #endif /* XPV_HVM_DRIVER */ 2319 2320 static int 2321 xdf_lb_getcap(dev_info_t *dip, diskaddr_t *capp) 2322 { 2323 xdf_t *vdp; 2324 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2325 2326 if (vdp == NULL) 2327 return (ENXIO); 2328 2329 mutex_enter(&vdp->xdf_dev_lk); 2330 *capp = vdp->xdf_pgeom.g_capacity; 2331 DPRINTF(LBL_DBG, ("xdf@%s:capacity %llu\n", vdp->xdf_addr, *capp)); 2332 mutex_exit(&vdp->xdf_dev_lk); 2333 return (0); 2334 } 2335 2336 static int 2337 xdf_lb_getpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2338 { 2339 xdf_t *vdp; 2340 2341 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 2342 return (ENXIO); 2343 *geomp = vdp->xdf_pgeom; 2344 return (0); 2345 } 2346 2347 /* 2348 * No real HBA, no geometry available from it 2349 */ 2350 /*ARGSUSED*/ 2351 static int 2352 xdf_lb_getvgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2353 { 2354 return (EINVAL); 2355 } 2356 2357 static int 2358 xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep) 2359 { 2360 xdf_t *vdp; 2361 2362 if (!(vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)))) 2363 return (ENXIO); 2364 2365 if (XD_IS_RO(vdp)) 2366 tgattributep->media_is_writable = 0; 2367 else 2368 tgattributep->media_is_writable = 1; 2369 return (0); 2370 } 2371 2372 /* ARGSUSED3 */ 2373 int 2374 xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 2375 { 2376 switch (cmd) { 2377 case TG_GETPHYGEOM: 2378 return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg)); 2379 case TG_GETVIRTGEOM: 2380 return (xdf_lb_getvgeom(dip, (cmlb_geom_t *)arg)); 2381 case TG_GETCAPACITY: 2382 return (xdf_lb_getcap(dip, (diskaddr_t *)arg)); 2383 case TG_GETBLOCKSIZE: 2384 *(uint32_t *)arg = XB_BSIZE; 2385 return (0); 2386 case TG_GETATTR: 2387 return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg)); 2388 default: 2389 return (ENOTTY); 2390 } 2391 } 2392 2393 /* ARGSUSED5 */ 2394 int 2395 xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp, 2396 diskaddr_t start, size_t reqlen, void *tg_cookie) 2397 { 2398 xdf_t *vdp; 2399 struct buf *bp; 2400 int err = 0; 2401 2402 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2403 2404 /* We don't allow IO from the oe_change callback thread */ 2405 ASSERT(curthread != vdp->xdf_oe_change_thread); 2406 2407 if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) 2408 return (EINVAL); 2409 2410 bp = getrbuf(KM_SLEEP); 2411 if (cmd == TG_READ) 2412 bp->b_flags = B_BUSY | B_READ; 2413 else 2414 bp->b_flags = B_BUSY | B_WRITE; 2415 bp->b_un.b_addr = bufp; 2416 bp->b_bcount = reqlen; 2417 bp->b_blkno = start; 2418 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 2419 2420 mutex_enter(&vdp->xdf_dev_lk); 2421 xdf_bp_push(vdp, bp); 2422 mutex_exit(&vdp->xdf_dev_lk); 2423 xdf_io_start(vdp); 2424 if (curthread == vdp->xdf_ready_tq_thread) 2425 (void) xdf_ring_drain(vdp); 2426 err = biowait(bp); 2427 ASSERT(bp->b_flags & B_DONE); 2428 freerbuf(bp); 2429 return (err); 2430 } 2431 2432 /* 2433 * Lock the current media. Set the media state to "lock". 2434 * (Media locks are only respected by the backend driver.) 2435 */ 2436 static int 2437 xdf_ioctl_mlock(xdf_t *vdp) 2438 { 2439 int rv; 2440 mutex_enter(&vdp->xdf_cb_lk); 2441 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2442 mutex_exit(&vdp->xdf_cb_lk); 2443 return (rv); 2444 } 2445 2446 /* 2447 * Release a media lock. Set the media state to "none". 2448 */ 2449 static int 2450 xdf_ioctl_munlock(xdf_t *vdp) 2451 { 2452 int rv; 2453 mutex_enter(&vdp->xdf_cb_lk); 2454 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_NONE, B_TRUE); 2455 mutex_exit(&vdp->xdf_cb_lk); 2456 return (rv); 2457 } 2458 2459 /* 2460 * Eject the current media. Ignores any media locks. (Media locks 2461 * are only for benifit of the the backend.) 2462 */ 2463 static int 2464 xdf_ioctl_eject(xdf_t *vdp) 2465 { 2466 int rv; 2467 2468 mutex_enter(&vdp->xdf_cb_lk); 2469 if ((rv = xdf_media_req(vdp, XBV_MEDIA_REQ_EJECT, B_FALSE)) != 0) { 2470 mutex_exit(&vdp->xdf_cb_lk); 2471 return (rv); 2472 } 2473 2474 /* 2475 * We've set the media requests xenbus parameter to eject, so now 2476 * disconnect from the backend, wait for the backend to clear 2477 * the media requets xenbus paramter, and then we can reconnect 2478 * to the backend. 2479 */ 2480 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 2481 mutex_enter(&vdp->xdf_dev_lk); 2482 if (xdf_connect_locked(vdp, B_TRUE) != XD_READY) { 2483 mutex_exit(&vdp->xdf_dev_lk); 2484 mutex_exit(&vdp->xdf_cb_lk); 2485 return (EIO); 2486 } 2487 mutex_exit(&vdp->xdf_dev_lk); 2488 mutex_exit(&vdp->xdf_cb_lk); 2489 return (0); 2490 } 2491 2492 /* 2493 * Watch for media state changes. This can be an insertion of a device 2494 * (triggered by a 'xm block-configure' request in another domain) or 2495 * the ejection of a device (triggered by a local "eject" operation). 2496 * For a full description of the DKIOCSTATE ioctl behavior see dkio(7I). 2497 */ 2498 static int 2499 xdf_dkstate(xdf_t *vdp, enum dkio_state mstate) 2500 { 2501 enum dkio_state prev_state; 2502 2503 mutex_enter(&vdp->xdf_cb_lk); 2504 prev_state = vdp->xdf_mstate; 2505 2506 if (vdp->xdf_mstate == mstate) { 2507 while (vdp->xdf_mstate == prev_state) { 2508 if (cv_wait_sig(&vdp->xdf_mstate_cv, 2509 &vdp->xdf_cb_lk) == 0) { 2510 mutex_exit(&vdp->xdf_cb_lk); 2511 return (EINTR); 2512 } 2513 } 2514 } 2515 2516 if ((prev_state != DKIO_INSERTED) && 2517 (vdp->xdf_mstate == DKIO_INSERTED)) { 2518 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2519 mutex_exit(&vdp->xdf_cb_lk); 2520 return (0); 2521 } 2522 2523 mutex_exit(&vdp->xdf_cb_lk); 2524 return (0); 2525 } 2526 2527 /*ARGSUSED*/ 2528 static int 2529 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 2530 int *rvalp) 2531 { 2532 minor_t minor = getminor(dev); 2533 int part = XDF_PART(minor); 2534 xdf_t *vdp; 2535 int rv; 2536 2537 if (((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) || 2538 (!xdf_isopen(vdp, part))) 2539 return (ENXIO); 2540 2541 DPRINTF(IOCTL_DBG, ("xdf@%s:ioctl: cmd %d (0x%x)\n", 2542 vdp->xdf_addr, cmd, cmd)); 2543 2544 switch (cmd) { 2545 default: 2546 return (ENOTTY); 2547 case DKIOCG_PHYGEOM: 2548 case DKIOCG_VIRTGEOM: 2549 case DKIOCGGEOM: 2550 case DKIOCSGEOM: 2551 case DKIOCGAPART: 2552 case DKIOCSAPART: 2553 case DKIOCGVTOC: 2554 case DKIOCSVTOC: 2555 case DKIOCPARTINFO: 2556 case DKIOCGEXTVTOC: 2557 case DKIOCSEXTVTOC: 2558 case DKIOCEXTPARTINFO: 2559 case DKIOCGMBOOT: 2560 case DKIOCSMBOOT: 2561 case DKIOCGETEFI: 2562 case DKIOCSETEFI: 2563 case DKIOCPARTITION: 2564 return (cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 2565 rvalp, NULL)); 2566 case FDEJECT: 2567 case DKIOCEJECT: 2568 case CDROMEJECT: 2569 return (xdf_ioctl_eject(vdp)); 2570 case DKIOCLOCK: 2571 return (xdf_ioctl_mlock(vdp)); 2572 case DKIOCUNLOCK: 2573 return (xdf_ioctl_munlock(vdp)); 2574 case CDROMREADOFFSET: { 2575 int offset = 0; 2576 if (!XD_IS_CD(vdp)) 2577 return (ENOTTY); 2578 if (ddi_copyout(&offset, (void *)arg, sizeof (int), mode)) 2579 return (EFAULT); 2580 return (0); 2581 } 2582 case DKIOCGMEDIAINFO: { 2583 struct dk_minfo media_info; 2584 2585 media_info.dki_lbsize = DEV_BSIZE; 2586 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; 2587 if (XD_IS_CD(vdp)) 2588 media_info.dki_media_type = DK_CDROM; 2589 else 2590 media_info.dki_media_type = DK_FIXED_DISK; 2591 2592 if (ddi_copyout(&media_info, (void *)arg, 2593 sizeof (struct dk_minfo), mode)) 2594 return (EFAULT); 2595 return (0); 2596 } 2597 case DKIOCINFO: { 2598 struct dk_cinfo info; 2599 2600 /* controller information */ 2601 if (XD_IS_CD(vdp)) 2602 info.dki_ctype = DKC_CDROM; 2603 else 2604 info.dki_ctype = DKC_VBD; 2605 2606 info.dki_cnum = 0; 2607 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 2608 2609 /* unit information */ 2610 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 2611 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 2612 info.dki_flags = DKI_FMTVOL; 2613 info.dki_partition = part; 2614 info.dki_maxtransfer = maxphys / DEV_BSIZE; 2615 info.dki_addr = 0; 2616 info.dki_space = 0; 2617 info.dki_prio = 0; 2618 info.dki_vec = 0; 2619 2620 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 2621 return (EFAULT); 2622 return (0); 2623 } 2624 case DKIOCSTATE: { 2625 enum dkio_state mstate; 2626 2627 if (ddi_copyin((void *)arg, &mstate, 2628 sizeof (mstate), mode) != 0) 2629 return (EFAULT); 2630 if ((rv = xdf_dkstate(vdp, mstate)) != 0) 2631 return (rv); 2632 mstate = vdp->xdf_mstate; 2633 if (ddi_copyout(&mstate, (void *)arg, 2634 sizeof (mstate), mode) != 0) 2635 return (EFAULT); 2636 return (0); 2637 } 2638 case DKIOCREMOVABLE: { 2639 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2640 if (ddi_copyout(&i, (caddr_t)arg, sizeof (i), mode)) 2641 return (EFAULT); 2642 return (0); 2643 } 2644 case DKIOCGETWCE: { 2645 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2646 if (ddi_copyout(&i, (void *)arg, sizeof (i), mode)) 2647 return (EFAULT); 2648 return (0); 2649 } 2650 case DKIOCSETWCE: { 2651 int i; 2652 if (ddi_copyin((void *)arg, &i, sizeof (i), mode)) 2653 return (EFAULT); 2654 vdp->xdf_wce = VOID2BOOLEAN(i); 2655 return (0); 2656 } 2657 case DKIOCFLUSHWRITECACHE: { 2658 struct dk_callback *dkc = (struct dk_callback *)arg; 2659 2660 if (vdp->xdf_flush_supported) { 2661 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2662 NULL, 0, 0, (void *)dev); 2663 } else if (vdp->xdf_feature_barrier && 2664 !xdf_barrier_flush_disable) { 2665 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2666 vdp->xdf_cache_flush_block, xdf_flush_block, 2667 DEV_BSIZE, (void *)dev); 2668 } else { 2669 return (ENOTTY); 2670 } 2671 if ((mode & FKIOCTL) && (dkc != NULL) && 2672 (dkc->dkc_callback != NULL)) { 2673 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 2674 /* need to return 0 after calling callback */ 2675 rv = 0; 2676 } 2677 return (rv); 2678 } 2679 } 2680 /*NOTREACHED*/ 2681 } 2682 2683 static int 2684 xdf_strategy(struct buf *bp) 2685 { 2686 xdf_t *vdp; 2687 minor_t minor; 2688 diskaddr_t p_blkct, p_blkst; 2689 ulong_t nblks; 2690 int part; 2691 2692 minor = getminor(bp->b_edev); 2693 part = XDF_PART(minor); 2694 vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor)); 2695 2696 mutex_enter(&vdp->xdf_dev_lk); 2697 if (!xdf_isopen(vdp, part)) { 2698 mutex_exit(&vdp->xdf_dev_lk); 2699 xdf_io_err(bp, ENXIO, 0); 2700 return (0); 2701 } 2702 2703 /* We don't allow IO from the oe_change callback thread */ 2704 ASSERT(curthread != vdp->xdf_oe_change_thread); 2705 2706 /* Check for writes to a read only device */ 2707 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 2708 mutex_exit(&vdp->xdf_dev_lk); 2709 xdf_io_err(bp, EROFS, 0); 2710 return (0); 2711 } 2712 2713 /* Check if this I/O is accessing a partition or the entire disk */ 2714 if ((long)bp->b_private == XB_SLICE_NONE) { 2715 /* This I/O is using an absolute offset */ 2716 p_blkct = vdp->xdf_xdev_nblocks; 2717 p_blkst = 0; 2718 } else { 2719 /* This I/O is using a partition relative offset */ 2720 mutex_exit(&vdp->xdf_dev_lk); 2721 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 2722 &p_blkst, NULL, NULL, NULL)) { 2723 xdf_io_err(bp, ENXIO, 0); 2724 return (0); 2725 } 2726 mutex_enter(&vdp->xdf_dev_lk); 2727 } 2728 2729 /* check for a starting block beyond the disk or partition limit */ 2730 if (bp->b_blkno > p_blkct) { 2731 DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64, 2732 vdp->xdf_addr, (longlong_t)bp->b_blkno, (uint64_t)p_blkct)); 2733 xdf_io_err(bp, EINVAL, 0); 2734 return (0); 2735 } 2736 2737 /* Legacy: don't set error flag at this case */ 2738 if (bp->b_blkno == p_blkct) { 2739 bp->b_resid = bp->b_bcount; 2740 biodone(bp); 2741 return (0); 2742 } 2743 2744 /* sanitize the input buf */ 2745 bioerror(bp, 0); 2746 bp->b_resid = 0; 2747 bp->av_back = bp->av_forw = NULL; 2748 2749 /* Adjust for partial transfer, this will result in an error later */ 2750 nblks = bp->b_bcount >> XB_BSHIFT; 2751 if ((bp->b_blkno + nblks) > p_blkct) { 2752 bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT; 2753 bp->b_bcount -= bp->b_resid; 2754 } 2755 2756 DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n", 2757 vdp->xdf_addr, (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount)); 2758 2759 /* Fix up the buf struct */ 2760 bp->b_flags |= B_BUSY; 2761 bp->b_private = (void *)(uintptr_t)p_blkst; 2762 2763 xdf_bp_push(vdp, bp); 2764 mutex_exit(&vdp->xdf_dev_lk); 2765 xdf_io_start(vdp); 2766 if (do_polled_io) 2767 (void) xdf_ring_drain(vdp); 2768 return (0); 2769 } 2770 2771 /*ARGSUSED*/ 2772 static int 2773 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 2774 { 2775 xdf_t *vdp; 2776 minor_t minor; 2777 diskaddr_t p_blkcnt; 2778 int part; 2779 2780 minor = getminor(dev); 2781 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2782 return (ENXIO); 2783 2784 DPRINTF(IO_DBG, ("xdf@%s: read offset 0x%"PRIx64"\n", 2785 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2786 2787 part = XDF_PART(minor); 2788 if (!xdf_isopen(vdp, part)) 2789 return (ENXIO); 2790 2791 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2792 NULL, NULL, NULL, NULL)) 2793 return (ENXIO); 2794 2795 if (U_INVAL(uiop)) 2796 return (EINVAL); 2797 2798 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 2799 } 2800 2801 /*ARGSUSED*/ 2802 static int 2803 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 2804 { 2805 xdf_t *vdp; 2806 minor_t minor; 2807 diskaddr_t p_blkcnt; 2808 int part; 2809 2810 minor = getminor(dev); 2811 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2812 return (ENXIO); 2813 2814 DPRINTF(IO_DBG, ("xdf@%s: write offset 0x%"PRIx64"\n", 2815 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2816 2817 part = XDF_PART(minor); 2818 if (!xdf_isopen(vdp, part)) 2819 return (ENXIO); 2820 2821 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2822 NULL, NULL, NULL, NULL)) 2823 return (ENXIO); 2824 2825 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 2826 return (ENOSPC); 2827 2828 if (U_INVAL(uiop)) 2829 return (EINVAL); 2830 2831 return (physio(xdf_strategy, NULL, dev, B_WRITE, xdfmin, uiop)); 2832 } 2833 2834 /*ARGSUSED*/ 2835 static int 2836 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 2837 { 2838 xdf_t *vdp; 2839 minor_t minor; 2840 struct uio *uiop = aiop->aio_uio; 2841 diskaddr_t p_blkcnt; 2842 int part; 2843 2844 minor = getminor(dev); 2845 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2846 return (ENXIO); 2847 2848 part = XDF_PART(minor); 2849 if (!xdf_isopen(vdp, part)) 2850 return (ENXIO); 2851 2852 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2853 NULL, NULL, NULL, NULL)) 2854 return (ENXIO); 2855 2856 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 2857 return (ENOSPC); 2858 2859 if (U_INVAL(uiop)) 2860 return (EINVAL); 2861 2862 return (aphysio(xdf_strategy, anocancel, dev, B_READ, xdfmin, aiop)); 2863 } 2864 2865 /*ARGSUSED*/ 2866 static int 2867 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 2868 { 2869 xdf_t *vdp; 2870 minor_t minor; 2871 struct uio *uiop = aiop->aio_uio; 2872 diskaddr_t p_blkcnt; 2873 int part; 2874 2875 minor = getminor(dev); 2876 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2877 return (ENXIO); 2878 2879 part = XDF_PART(minor); 2880 if (!xdf_isopen(vdp, part)) 2881 return (ENXIO); 2882 2883 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2884 NULL, NULL, NULL, NULL)) 2885 return (ENXIO); 2886 2887 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 2888 return (ENOSPC); 2889 2890 if (U_INVAL(uiop)) 2891 return (EINVAL); 2892 2893 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, xdfmin, aiop)); 2894 } 2895 2896 static int 2897 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 2898 { 2899 struct buf dumpbuf, *dbp = &dumpbuf; 2900 xdf_t *vdp; 2901 minor_t minor; 2902 int err = 0; 2903 int part; 2904 diskaddr_t p_blkcnt, p_blkst; 2905 2906 minor = getminor(dev); 2907 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2908 return (ENXIO); 2909 2910 DPRINTF(IO_DBG, ("xdf@%s: dump addr (0x%p) blk (%ld) nblks (%d)\n", 2911 vdp->xdf_addr, (void *)addr, blkno, nblk)); 2912 2913 /* We don't allow IO from the oe_change callback thread */ 2914 ASSERT(curthread != vdp->xdf_oe_change_thread); 2915 2916 part = XDF_PART(minor); 2917 if (!xdf_isopen(vdp, part)) 2918 return (ENXIO); 2919 2920 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 2921 NULL, NULL, NULL)) 2922 return (ENXIO); 2923 2924 if ((blkno + nblk) > p_blkcnt) { 2925 cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64, 2926 vdp->xdf_addr, blkno + nblk, (uint64_t)p_blkcnt); 2927 return (EINVAL); 2928 } 2929 2930 bioinit(dbp); 2931 dbp->b_flags = B_BUSY; 2932 dbp->b_un.b_addr = addr; 2933 dbp->b_bcount = nblk << DEV_BSHIFT; 2934 dbp->b_blkno = blkno; 2935 dbp->b_edev = dev; 2936 dbp->b_private = (void *)(uintptr_t)p_blkst; 2937 2938 mutex_enter(&vdp->xdf_dev_lk); 2939 xdf_bp_push(vdp, dbp); 2940 mutex_exit(&vdp->xdf_dev_lk); 2941 xdf_io_start(vdp); 2942 err = xdf_ring_drain(vdp); 2943 biofini(dbp); 2944 return (err); 2945 } 2946 2947 /*ARGSUSED*/ 2948 static int 2949 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 2950 { 2951 minor_t minor; 2952 xdf_t *vdp; 2953 int part; 2954 ulong_t parbit; 2955 2956 minor = getminor(dev); 2957 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2958 return (ENXIO); 2959 2960 mutex_enter(&vdp->xdf_dev_lk); 2961 part = XDF_PART(minor); 2962 if (!xdf_isopen(vdp, part)) { 2963 mutex_exit(&vdp->xdf_dev_lk); 2964 return (ENXIO); 2965 } 2966 parbit = 1 << part; 2967 2968 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); 2969 if (otyp == OTYP_LYR) { 2970 ASSERT(vdp->xdf_vd_lyropen[part] > 0); 2971 if (--vdp->xdf_vd_lyropen[part] == 0) 2972 vdp->xdf_vd_open[otyp] &= ~parbit; 2973 } else { 2974 vdp->xdf_vd_open[otyp] &= ~parbit; 2975 } 2976 vdp->xdf_vd_exclopen &= ~parbit; 2977 2978 mutex_exit(&vdp->xdf_dev_lk); 2979 return (0); 2980 } 2981 2982 static int 2983 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 2984 { 2985 minor_t minor; 2986 xdf_t *vdp; 2987 int part; 2988 ulong_t parbit; 2989 diskaddr_t p_blkct = 0; 2990 boolean_t firstopen; 2991 boolean_t nodelay; 2992 2993 minor = getminor(*devp); 2994 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2995 return (ENXIO); 2996 2997 nodelay = (flag & (FNDELAY | FNONBLOCK)); 2998 2999 DPRINTF(DDI_DBG, ("xdf@%s: opening\n", vdp->xdf_addr)); 3000 3001 /* do cv_wait until connected or failed */ 3002 mutex_enter(&vdp->xdf_cb_lk); 3003 mutex_enter(&vdp->xdf_dev_lk); 3004 if (!nodelay && (xdf_connect_locked(vdp, B_TRUE) != XD_READY)) { 3005 mutex_exit(&vdp->xdf_dev_lk); 3006 mutex_exit(&vdp->xdf_cb_lk); 3007 return (ENXIO); 3008 } 3009 mutex_exit(&vdp->xdf_cb_lk); 3010 3011 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 3012 mutex_exit(&vdp->xdf_dev_lk); 3013 return (EROFS); 3014 } 3015 3016 part = XDF_PART(minor); 3017 parbit = 1 << part; 3018 if ((vdp->xdf_vd_exclopen & parbit) || 3019 ((flag & FEXCL) && xdf_isopen(vdp, part))) { 3020 mutex_exit(&vdp->xdf_dev_lk); 3021 return (EBUSY); 3022 } 3023 3024 /* are we the first one to open this node? */ 3025 firstopen = !xdf_isopen(vdp, -1); 3026 3027 if (otyp == OTYP_LYR) 3028 vdp->xdf_vd_lyropen[part]++; 3029 3030 vdp->xdf_vd_open[otyp] |= parbit; 3031 3032 if (flag & FEXCL) 3033 vdp->xdf_vd_exclopen |= parbit; 3034 3035 mutex_exit(&vdp->xdf_dev_lk); 3036 3037 /* force a re-validation */ 3038 if (firstopen) 3039 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 3040 3041 /* If this is a non-blocking open then we're done */ 3042 if (nodelay) 3043 return (0); 3044 3045 /* 3046 * This is a blocking open, so we require: 3047 * - that the disk have a valid label on it 3048 * - that the size of the partition that we're opening is non-zero 3049 */ 3050 if ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 3051 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0)) { 3052 (void) xdf_close(*devp, flag, otyp, credp); 3053 return (ENXIO); 3054 } 3055 3056 return (0); 3057 } 3058 3059 /*ARGSUSED*/ 3060 static void 3061 xdf_watch_hp_status_cb(dev_info_t *dip, const char *path, void *arg) 3062 { 3063 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 3064 cv_broadcast(&vdp->xdf_hp_status_cv); 3065 } 3066 3067 static int 3068 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 3069 char *name, caddr_t valuep, int *lengthp) 3070 { 3071 xdf_t *vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 3072 3073 /* 3074 * Sanity check that if a dev_t or dip were specified that they 3075 * correspond to this device driver. On debug kernels we'll 3076 * panic and on non-debug kernels we'll return failure. 3077 */ 3078 ASSERT(ddi_driver_major(dip) == xdf_major); 3079 ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdf_major)); 3080 if ((ddi_driver_major(dip) != xdf_major) || 3081 ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdf_major))) 3082 return (DDI_PROP_NOT_FOUND); 3083 3084 if (vdp == NULL) 3085 return (ddi_prop_op(dev, dip, prop_op, flags, 3086 name, valuep, lengthp)); 3087 3088 return (cmlb_prop_op(vdp->xdf_vd_lbl, 3089 dev, dip, prop_op, flags, name, valuep, lengthp, 3090 XDF_PART(getminor(dev)), NULL)); 3091 } 3092 3093 /*ARGSUSED*/ 3094 static int 3095 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 3096 { 3097 int instance = XDF_INST(getminor((dev_t)arg)); 3098 xdf_t *vbdp; 3099 3100 switch (cmd) { 3101 case DDI_INFO_DEVT2DEVINFO: 3102 if ((vbdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) { 3103 *rp = NULL; 3104 return (DDI_FAILURE); 3105 } 3106 *rp = vbdp->xdf_dip; 3107 return (DDI_SUCCESS); 3108 3109 case DDI_INFO_DEVT2INSTANCE: 3110 *rp = (void *)(uintptr_t)instance; 3111 return (DDI_SUCCESS); 3112 3113 default: 3114 return (DDI_FAILURE); 3115 } 3116 } 3117 3118 /*ARGSUSED*/ 3119 static int 3120 xdf_resume(dev_info_t *dip) 3121 { 3122 xdf_t *vdp; 3123 char *oename; 3124 3125 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 3126 goto err; 3127 3128 if (xdf_debug & SUSRES_DBG) 3129 xen_printf("xdf@%s: xdf_resume\n", vdp->xdf_addr); 3130 3131 mutex_enter(&vdp->xdf_cb_lk); 3132 3133 if (xvdi_resume(dip) != DDI_SUCCESS) { 3134 mutex_exit(&vdp->xdf_cb_lk); 3135 goto err; 3136 } 3137 3138 if (((oename = xvdi_get_oename(dip)) == NULL) || 3139 (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3140 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)) { 3141 mutex_exit(&vdp->xdf_cb_lk); 3142 goto err; 3143 } 3144 3145 mutex_enter(&vdp->xdf_dev_lk); 3146 ASSERT(vdp->xdf_state != XD_READY); 3147 xdf_set_state(vdp, XD_UNKNOWN); 3148 mutex_exit(&vdp->xdf_dev_lk); 3149 3150 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3151 mutex_exit(&vdp->xdf_cb_lk); 3152 goto err; 3153 } 3154 3155 mutex_exit(&vdp->xdf_cb_lk); 3156 3157 if (xdf_debug & SUSRES_DBG) 3158 xen_printf("xdf@%s: xdf_resume: done\n", vdp->xdf_addr); 3159 return (DDI_SUCCESS); 3160 err: 3161 if (xdf_debug & SUSRES_DBG) 3162 xen_printf("xdf@%s: xdf_resume: fail\n", vdp->xdf_addr); 3163 return (DDI_FAILURE); 3164 } 3165 3166 static int 3167 xdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3168 { 3169 int n, instance = ddi_get_instance(dip); 3170 ddi_iblock_cookie_t ibc, softibc; 3171 boolean_t dev_iscd = B_FALSE; 3172 xdf_t *vdp; 3173 char *oename, *xsname, *str; 3174 3175 if ((n = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_NOTPROM, 3176 "xdf_debug", 0)) != 0) 3177 xdf_debug = n; 3178 3179 switch (cmd) { 3180 case DDI_RESUME: 3181 return (xdf_resume(dip)); 3182 case DDI_ATTACH: 3183 break; 3184 default: 3185 return (DDI_FAILURE); 3186 } 3187 /* DDI_ATTACH */ 3188 3189 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 3190 ((oename = xvdi_get_oename(dip)) == NULL)) 3191 return (DDI_FAILURE); 3192 3193 /* 3194 * Disable auto-detach. This is necessary so that we don't get 3195 * detached while we're disconnected from the back end. 3196 */ 3197 if ((ddi_prop_update_int(DDI_DEV_T_NONE, dip, 3198 DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS)) 3199 return (DDI_FAILURE); 3200 3201 /* driver handles kernel-issued IOCTLs */ 3202 if (ddi_prop_create(DDI_DEV_T_NONE, dip, 3203 DDI_PROP_CANSLEEP, DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) 3204 return (DDI_FAILURE); 3205 3206 if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS) 3207 return (DDI_FAILURE); 3208 3209 if (ddi_get_soft_iblock_cookie(dip, 3210 DDI_SOFTINT_LOW, &softibc) != DDI_SUCCESS) 3211 return (DDI_FAILURE); 3212 3213 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 3214 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 3215 ddi_get_name_addr(dip)); 3216 return (DDI_FAILURE); 3217 } 3218 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 3219 dev_iscd = B_TRUE; 3220 strfree(str); 3221 3222 if (ddi_soft_state_zalloc(xdf_ssp, instance) != DDI_SUCCESS) 3223 return (DDI_FAILURE); 3224 3225 DPRINTF(DDI_DBG, ("xdf@%s: attaching\n", ddi_get_name_addr(dip))); 3226 vdp = ddi_get_soft_state(xdf_ssp, instance); 3227 ddi_set_driver_private(dip, vdp); 3228 vdp->xdf_dip = dip; 3229 vdp->xdf_addr = ddi_get_name_addr(dip); 3230 vdp->xdf_suspending = B_FALSE; 3231 vdp->xdf_media_req_supported = B_FALSE; 3232 vdp->xdf_peer = INVALID_DOMID; 3233 vdp->xdf_evtchn = INVALID_EVTCHN; 3234 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 3235 offsetof(v_req_t, v_link)); 3236 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 3237 cv_init(&vdp->xdf_hp_status_cv, NULL, CV_DEFAULT, NULL); 3238 cv_init(&vdp->xdf_mstate_cv, NULL, CV_DEFAULT, NULL); 3239 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3240 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3241 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3242 vdp->xdf_cmbl_reattach = B_TRUE; 3243 if (dev_iscd) { 3244 vdp->xdf_dinfo |= VDISK_CDROM; 3245 vdp->xdf_mstate = DKIO_EJECTED; 3246 } else { 3247 vdp->xdf_mstate = DKIO_NONE; 3248 } 3249 3250 if ((vdp->xdf_ready_tq = ddi_taskq_create(dip, "xdf_ready_tq", 3251 1, TASKQ_DEFAULTPRI, 0)) == NULL) 3252 goto errout0; 3253 3254 if (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3255 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS) 3256 goto errout0; 3257 3258 if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 3259 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 3260 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 3261 ddi_get_name_addr(dip)); 3262 goto errout0; 3263 } 3264 3265 /* 3266 * Initialize the physical geometry stucture. Note that currently 3267 * we don't know the size of the backend device so the number 3268 * of blocks on the device will be initialized to zero. Once 3269 * we connect to the backend device we'll update the physical 3270 * geometry to reflect the real size of the device. 3271 */ 3272 xdf_synthetic_pgeom(dip, &vdp->xdf_pgeom); 3273 vdp->xdf_pgeom_fixed = B_FALSE; 3274 3275 /* 3276 * create default device minor nodes: non-removable disk 3277 * we will adjust minor nodes after we are connected w/ backend 3278 */ 3279 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 3280 if (xdf_cmlb_attach(vdp) != 0) { 3281 cmn_err(CE_WARN, 3282 "xdf@%s: attach failed, cmlb attach failed", 3283 ddi_get_name_addr(dip)); 3284 goto errout0; 3285 } 3286 3287 /* 3288 * We ship with cache-enabled disks 3289 */ 3290 vdp->xdf_wce = B_TRUE; 3291 3292 mutex_enter(&vdp->xdf_cb_lk); 3293 /* Watch backend XenbusState change */ 3294 if (xvdi_add_event_handler(dip, 3295 XS_OE_STATE, xdf_oe_change, NULL) != DDI_SUCCESS) { 3296 mutex_exit(&vdp->xdf_cb_lk); 3297 goto errout0; 3298 } 3299 3300 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3301 cmn_err(CE_WARN, "xdf@%s: start connection failed", 3302 ddi_get_name_addr(dip)); 3303 mutex_exit(&vdp->xdf_cb_lk); 3304 goto errout1; 3305 } 3306 mutex_exit(&vdp->xdf_cb_lk); 3307 3308 #if defined(XPV_HVM_DRIVER) 3309 3310 xdf_hvm_add(dip); 3311 3312 /* Report our version to dom0. */ 3313 if (xenbus_printf(XBT_NULL, "hvmpv/xdf", "version", "%d", 3314 HVMPV_XDF_VERS)) 3315 cmn_err(CE_WARN, "xdf: couldn't write version\n"); 3316 3317 #else /* !XPV_HVM_DRIVER */ 3318 3319 /* create kstat for iostat(1M) */ 3320 if (xdf_kstat_create(dip, "xdf", instance) != 0) { 3321 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 3322 ddi_get_name_addr(dip)); 3323 goto errout1; 3324 } 3325 3326 #endif /* !XPV_HVM_DRIVER */ 3327 3328 ddi_report_dev(dip); 3329 DPRINTF(DDI_DBG, ("xdf@%s: attached\n", vdp->xdf_addr)); 3330 return (DDI_SUCCESS); 3331 3332 errout1: 3333 (void) xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed); 3334 xvdi_remove_event_handler(dip, XS_OE_STATE); 3335 errout0: 3336 if (vdp->xdf_vd_lbl != NULL) { 3337 cmlb_detach(vdp->xdf_vd_lbl, NULL); 3338 cmlb_free_handle(&vdp->xdf_vd_lbl); 3339 vdp->xdf_vd_lbl = NULL; 3340 } 3341 if (vdp->xdf_softintr_id != NULL) 3342 ddi_remove_softintr(vdp->xdf_softintr_id); 3343 xvdi_remove_xb_watch_handlers(dip); 3344 if (vdp->xdf_ready_tq != NULL) 3345 ddi_taskq_destroy(vdp->xdf_ready_tq); 3346 mutex_destroy(&vdp->xdf_cb_lk); 3347 mutex_destroy(&vdp->xdf_dev_lk); 3348 cv_destroy(&vdp->xdf_dev_cv); 3349 cv_destroy(&vdp->xdf_hp_status_cv); 3350 ddi_soft_state_free(xdf_ssp, instance); 3351 ddi_set_driver_private(dip, NULL); 3352 ddi_prop_remove_all(dip); 3353 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(dip)); 3354 return (DDI_FAILURE); 3355 } 3356 3357 static int 3358 xdf_suspend(dev_info_t *dip) 3359 { 3360 int instance = ddi_get_instance(dip); 3361 xdf_t *vdp; 3362 3363 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) 3364 return (DDI_FAILURE); 3365 3366 if (xdf_debug & SUSRES_DBG) 3367 xen_printf("xdf@%s: xdf_suspend\n", vdp->xdf_addr); 3368 3369 xvdi_suspend(dip); 3370 3371 mutex_enter(&vdp->xdf_cb_lk); 3372 mutex_enter(&vdp->xdf_dev_lk); 3373 3374 vdp->xdf_suspending = B_TRUE; 3375 xdf_ring_destroy(vdp); 3376 xdf_set_state(vdp, XD_SUSPEND); 3377 vdp->xdf_suspending = B_FALSE; 3378 3379 mutex_exit(&vdp->xdf_dev_lk); 3380 mutex_exit(&vdp->xdf_cb_lk); 3381 3382 if (xdf_debug & SUSRES_DBG) 3383 xen_printf("xdf@%s: xdf_suspend: done\n", vdp->xdf_addr); 3384 3385 return (DDI_SUCCESS); 3386 } 3387 3388 static int 3389 xdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3390 { 3391 xdf_t *vdp; 3392 int instance; 3393 3394 switch (cmd) { 3395 3396 case DDI_PM_SUSPEND: 3397 break; 3398 3399 case DDI_SUSPEND: 3400 return (xdf_suspend(dip)); 3401 3402 case DDI_DETACH: 3403 break; 3404 3405 default: 3406 return (DDI_FAILURE); 3407 } 3408 3409 instance = ddi_get_instance(dip); 3410 DPRINTF(DDI_DBG, ("xdf@%s: detaching\n", ddi_get_name_addr(dip))); 3411 vdp = ddi_get_soft_state(xdf_ssp, instance); 3412 3413 if (vdp == NULL) 3414 return (DDI_FAILURE); 3415 3416 mutex_enter(&vdp->xdf_cb_lk); 3417 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 3418 if (vdp->xdf_state != XD_CLOSED) { 3419 mutex_exit(&vdp->xdf_cb_lk); 3420 return (DDI_FAILURE); 3421 } 3422 mutex_exit(&vdp->xdf_cb_lk); 3423 3424 ASSERT(!ISDMACBON(vdp)); 3425 3426 #if defined(XPV_HVM_DRIVER) 3427 xdf_hvm_rm(dip); 3428 #endif /* XPV_HVM_DRIVER */ 3429 3430 if (vdp->xdf_timeout_id != 0) 3431 (void) untimeout(vdp->xdf_timeout_id); 3432 3433 xvdi_remove_event_handler(dip, XS_OE_STATE); 3434 ddi_taskq_destroy(vdp->xdf_ready_tq); 3435 3436 cmlb_detach(vdp->xdf_vd_lbl, NULL); 3437 cmlb_free_handle(&vdp->xdf_vd_lbl); 3438 3439 /* we'll support backend running in domU later */ 3440 #ifdef DOMU_BACKEND 3441 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 3442 #endif 3443 3444 list_destroy(&vdp->xdf_vreq_act); 3445 ddi_prop_remove_all(dip); 3446 xdf_kstat_delete(dip); 3447 ddi_remove_softintr(vdp->xdf_softintr_id); 3448 xvdi_remove_xb_watch_handlers(dip); 3449 ddi_set_driver_private(dip, NULL); 3450 cv_destroy(&vdp->xdf_dev_cv); 3451 mutex_destroy(&vdp->xdf_cb_lk); 3452 mutex_destroy(&vdp->xdf_dev_lk); 3453 if (vdp->xdf_cache_flush_block != NULL) 3454 kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE); 3455 ddi_soft_state_free(xdf_ssp, instance); 3456 return (DDI_SUCCESS); 3457 } 3458 3459 /* 3460 * Driver linkage structures. 3461 */ 3462 static struct cb_ops xdf_cbops = { 3463 xdf_open, 3464 xdf_close, 3465 xdf_strategy, 3466 nodev, 3467 xdf_dump, 3468 xdf_read, 3469 xdf_write, 3470 xdf_ioctl, 3471 nodev, 3472 nodev, 3473 nodev, 3474 nochpoll, 3475 xdf_prop_op, 3476 NULL, 3477 D_MP | D_NEW | D_64BIT, 3478 CB_REV, 3479 xdf_aread, 3480 xdf_awrite 3481 }; 3482 3483 struct dev_ops xdf_devops = { 3484 DEVO_REV, /* devo_rev */ 3485 0, /* devo_refcnt */ 3486 xdf_getinfo, /* devo_getinfo */ 3487 nulldev, /* devo_identify */ 3488 nulldev, /* devo_probe */ 3489 xdf_attach, /* devo_attach */ 3490 xdf_detach, /* devo_detach */ 3491 nodev, /* devo_reset */ 3492 &xdf_cbops, /* devo_cb_ops */ 3493 NULL, /* devo_bus_ops */ 3494 NULL, /* devo_power */ 3495 ddi_quiesce_not_supported, /* devo_quiesce */ 3496 }; 3497 3498 /* 3499 * Module linkage structures. 3500 */ 3501 static struct modldrv modldrv = { 3502 &mod_driverops, /* Type of module. This one is a driver */ 3503 "virtual block driver", /* short description */ 3504 &xdf_devops /* driver specific ops */ 3505 }; 3506 3507 static struct modlinkage xdf_modlinkage = { 3508 MODREV_1, (void *)&modldrv, NULL 3509 }; 3510 3511 /* 3512 * standard module entry points 3513 */ 3514 int 3515 _init(void) 3516 { 3517 int rc; 3518 3519 xdf_major = ddi_name_to_major("xdf"); 3520 if (xdf_major == (major_t)-1) 3521 return (EINVAL); 3522 3523 if ((rc = ddi_soft_state_init(&xdf_ssp, sizeof (xdf_t), 0)) != 0) 3524 return (rc); 3525 3526 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 3527 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3528 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 3529 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3530 3531 #if defined(XPV_HVM_DRIVER) 3532 xdf_hvm_init(); 3533 #endif /* XPV_HVM_DRIVER */ 3534 3535 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 3536 #if defined(XPV_HVM_DRIVER) 3537 xdf_hvm_fini(); 3538 #endif /* XPV_HVM_DRIVER */ 3539 kmem_cache_destroy(xdf_vreq_cache); 3540 kmem_cache_destroy(xdf_gs_cache); 3541 ddi_soft_state_fini(&xdf_ssp); 3542 return (rc); 3543 } 3544 3545 return (rc); 3546 } 3547 3548 int 3549 _fini(void) 3550 { 3551 int err; 3552 if ((err = mod_remove(&xdf_modlinkage)) != 0) 3553 return (err); 3554 3555 #if defined(XPV_HVM_DRIVER) 3556 xdf_hvm_fini(); 3557 #endif /* XPV_HVM_DRIVER */ 3558 3559 kmem_cache_destroy(xdf_vreq_cache); 3560 kmem_cache_destroy(xdf_gs_cache); 3561 ddi_soft_state_fini(&xdf_ssp); 3562 3563 return (0); 3564 } 3565 3566 int 3567 _info(struct modinfo *modinfop) 3568 { 3569 return (mod_info(&xdf_modlinkage, modinfop)); 3570 } 3571