1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * xdf.c - Xen Virtual Block Device Driver 29 * TODO: 30 * - support alternate block size (currently only DEV_BSIZE supported) 31 * - revalidate geometry for removable devices 32 * 33 * This driver export solaris disk device nodes, accepts IO requests from 34 * those nodes, and services those requests by talking to a backend device 35 * in another domain. 36 * 37 * Communication with the backend device is done via a ringbuffer (which is 38 * managed via xvdi interfaces) and dma memory (which is managed via ddi 39 * interfaces). 40 * 41 * Communication with the backend device is dependant upon establishing a 42 * connection to the backend device. This connection process involves 43 * reading device configuration information from xenbus and publishing 44 * some frontend runtime configuration parameters via the xenbus (for 45 * consumption by the backend). Once we've published runtime configuration 46 * information via the xenbus, the backend device can enter the connected 47 * state and we'll enter the XD_CONNECTED state. But before we can allow 48 * random IO to begin, we need to do IO to the backend device to determine 49 * the device label and if flush operations are supported. Once this is 50 * done we enter the XD_READY state and can process any IO operations. 51 * 52 * We recieve notifications of xenbus state changes for the backend device 53 * (aka, the "other end") via the xdf_oe_change() callback. This callback 54 * is single threaded, meaning that we can't recieve new notification of 55 * other end state changes while we're processing an outstanding 56 * notification of an other end state change. There for we can't do any 57 * blocking operations from the xdf_oe_change() callback. This is why we 58 * have a seperate taskq (xdf_ready_tq) which exists to do the necessary 59 * IO to get us from the XD_CONNECTED to the XD_READY state. All IO 60 * generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go 61 * throught xdf_lb_rdwr(), which is a synchronous IO interface. IOs 62 * generated by the xdf_ready_tq_thread thread have priority over all 63 * other IO requests. 64 * 65 * We also communicate with the backend device via the xenbus "media-req" 66 * (XBP_MEDIA_REQ) property. For more information on this see the 67 * comments in blkif.h. 68 */ 69 70 #include <io/xdf.h> 71 72 #include <sys/conf.h> 73 #include <sys/dkio.h> 74 #include <sys/promif.h> 75 #include <sys/sysmacros.h> 76 #include <sys/kstat.h> 77 #include <sys/mach_mmu.h> 78 #ifdef XPV_HVM_DRIVER 79 #include <sys/xpv_support.h> 80 #include <sys/sunndi.h> 81 #else /* !XPV_HVM_DRIVER */ 82 #include <sys/evtchn_impl.h> 83 #endif /* !XPV_HVM_DRIVER */ 84 #include <public/io/xenbus.h> 85 #include <xen/sys/xenbus_impl.h> 86 #include <sys/scsi/generic/inquiry.h> 87 #include <xen/io/blkif_impl.h> 88 #include <sys/fdio.h> 89 #include <sys/cdio.h> 90 91 /* 92 * DEBUG_EVAL can be used to include debug only statements without 93 * having to use '#ifdef DEBUG' statements 94 */ 95 #ifdef DEBUG 96 #define DEBUG_EVAL(x) (x) 97 #else /* !DEBUG */ 98 #define DEBUG_EVAL(x) 99 #endif /* !DEBUG */ 100 101 #define XDF_DRAIN_MSEC_DELAY (50*1000) /* 00.05 sec */ 102 #define XDF_DRAIN_RETRY_COUNT 200 /* 10.00 sec */ 103 104 #define INVALID_DOMID ((domid_t)-1) 105 #define FLUSH_DISKCACHE 0x1 106 #define WRITE_BARRIER 0x2 107 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 108 #define USE_WRITE_BARRIER(vdp) \ 109 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 110 #define USE_FLUSH_DISKCACHE(vdp) \ 111 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 112 #define IS_WRITE_BARRIER(vdp, bp) \ 113 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 114 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 115 #define IS_FLUSH_DISKCACHE(bp) \ 116 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 117 118 #define VREQ_DONE(vreq) \ 119 VOID2BOOLEAN(((vreq)->v_status == VREQ_DMAWIN_DONE) && \ 120 (((vreq)->v_flush_diskcache == FLUSH_DISKCACHE) || \ 121 (((vreq)->v_dmaw + 1) == (vreq)->v_ndmaws))) 122 123 #define BP_VREQ(bp) ((v_req_t *)((bp)->av_back)) 124 #define BP_VREQ_SET(bp, vreq) (((bp)->av_back = (buf_t *)(vreq))) 125 126 extern int do_polled_io; 127 128 /* run-time tunables that we don't want the compiler to optimize away */ 129 volatile int xdf_debug = 0; 130 volatile boolean_t xdf_barrier_flush_disable = B_FALSE; 131 132 /* per module globals */ 133 major_t xdf_major; 134 static void *xdf_ssp; 135 static kmem_cache_t *xdf_vreq_cache; 136 static kmem_cache_t *xdf_gs_cache; 137 static int xdf_maxphys = XB_MAXPHYS; 138 static diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 139 static int xdf_fbrewrites; /* flush block re-write count */ 140 141 /* misc public functions (used by xdf_shell.c) */ 142 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); 143 int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 144 145 /* misc private functions */ 146 static void xdf_io_start(xdf_t *); 147 148 /* callbacks from commmon label */ 149 static cmlb_tg_ops_t xdf_lb_ops = { 150 TG_DK_OPS_VERSION_1, 151 xdf_lb_rdwr, 152 xdf_lb_getinfo 153 }; 154 155 /* 156 * I/O buffer DMA attributes 157 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 158 */ 159 static ddi_dma_attr_t xb_dma_attr = { 160 DMA_ATTR_V0, 161 (uint64_t)0, /* lowest address */ 162 (uint64_t)0xffffffffffffffff, /* highest usable address */ 163 (uint64_t)0xffffff, /* DMA counter limit max */ 164 (uint64_t)XB_BSIZE, /* alignment in bytes */ 165 XB_BSIZE - 1, /* bitmap of burst sizes */ 166 XB_BSIZE, /* min transfer */ 167 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 168 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 169 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 170 XB_BSIZE, /* granularity */ 171 0, /* flags (reserved) */ 172 }; 173 174 static ddi_device_acc_attr_t xc_acc_attr = { 175 DDI_DEVICE_ATTR_V0, 176 DDI_NEVERSWAP_ACC, 177 DDI_STRICTORDER_ACC 178 }; 179 180 static void 181 xdf_timeout_handler(void *arg) 182 { 183 xdf_t *vdp = arg; 184 185 mutex_enter(&vdp->xdf_dev_lk); 186 vdp->xdf_timeout_id = 0; 187 mutex_exit(&vdp->xdf_dev_lk); 188 189 /* new timeout thread could be re-scheduled */ 190 xdf_io_start(vdp); 191 } 192 193 /* 194 * callback func when DMA/GTE resources is available 195 * 196 * Note: we only register one callback function to grant table subsystem 197 * since we only have one 'struct gnttab_free_callback' in xdf_t. 198 */ 199 static int 200 xdf_dmacallback(caddr_t arg) 201 { 202 xdf_t *vdp = (xdf_t *)arg; 203 ASSERT(vdp != NULL); 204 205 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 206 vdp->xdf_addr)); 207 208 ddi_trigger_softintr(vdp->xdf_softintr_id); 209 return (DDI_DMA_CALLBACK_DONE); 210 } 211 212 static ge_slot_t * 213 gs_get(xdf_t *vdp, int isread) 214 { 215 grant_ref_t gh; 216 ge_slot_t *gs; 217 218 /* try to alloc GTEs needed in this slot, first */ 219 if (gnttab_alloc_grant_references( 220 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 221 if (vdp->xdf_gnt_callback.next == NULL) { 222 SETDMACBON(vdp); 223 gnttab_request_free_callback( 224 &vdp->xdf_gnt_callback, 225 (void (*)(void *))xdf_dmacallback, 226 (void *)vdp, 227 BLKIF_MAX_SEGMENTS_PER_REQUEST); 228 } 229 return (NULL); 230 } 231 232 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 233 if (gs == NULL) { 234 gnttab_free_grant_references(gh); 235 if (vdp->xdf_timeout_id == 0) 236 /* restart I/O after one second */ 237 vdp->xdf_timeout_id = 238 timeout(xdf_timeout_handler, vdp, hz); 239 return (NULL); 240 } 241 242 /* init gs_slot */ 243 gs->gs_oeid = vdp->xdf_peer; 244 gs->gs_isread = isread; 245 gs->gs_ghead = gh; 246 gs->gs_ngrefs = 0; 247 248 return (gs); 249 } 250 251 static void 252 gs_free(ge_slot_t *gs) 253 { 254 int i; 255 256 /* release all grant table entry resources used in this slot */ 257 for (i = 0; i < gs->gs_ngrefs; i++) 258 gnttab_end_foreign_access(gs->gs_ge[i], !gs->gs_isread, 0); 259 gnttab_free_grant_references(gs->gs_ghead); 260 list_remove(&gs->gs_vreq->v_gs, gs); 261 kmem_cache_free(xdf_gs_cache, gs); 262 } 263 264 static grant_ref_t 265 gs_grant(ge_slot_t *gs, mfn_t mfn) 266 { 267 grant_ref_t gr = gnttab_claim_grant_reference(&gs->gs_ghead); 268 269 ASSERT(gr != -1); 270 ASSERT(gs->gs_ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 271 gs->gs_ge[gs->gs_ngrefs++] = gr; 272 gnttab_grant_foreign_access_ref(gr, gs->gs_oeid, mfn, !gs->gs_isread); 273 274 return (gr); 275 } 276 277 /* 278 * Alloc a vreq for this bp 279 * bp->av_back contains the pointer to the vreq upon return 280 */ 281 static v_req_t * 282 vreq_get(xdf_t *vdp, buf_t *bp) 283 { 284 v_req_t *vreq = NULL; 285 286 ASSERT(BP_VREQ(bp) == NULL); 287 288 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 289 if (vreq == NULL) { 290 if (vdp->xdf_timeout_id == 0) 291 /* restart I/O after one second */ 292 vdp->xdf_timeout_id = 293 timeout(xdf_timeout_handler, vdp, hz); 294 return (NULL); 295 } 296 bzero(vreq, sizeof (v_req_t)); 297 list_create(&vreq->v_gs, sizeof (ge_slot_t), 298 offsetof(ge_slot_t, gs_vreq_link)); 299 vreq->v_buf = bp; 300 vreq->v_status = VREQ_INIT; 301 vreq->v_runq = B_FALSE; 302 BP_VREQ_SET(bp, vreq); 303 /* init of other fields in vreq is up to the caller */ 304 305 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 306 307 return (vreq); 308 } 309 310 static void 311 vreq_free(xdf_t *vdp, v_req_t *vreq) 312 { 313 buf_t *bp = vreq->v_buf; 314 315 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 316 ASSERT(BP_VREQ(bp) == vreq); 317 318 list_remove(&vdp->xdf_vreq_act, vreq); 319 320 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 321 goto done; 322 323 switch (vreq->v_status) { 324 case VREQ_DMAWIN_DONE: 325 case VREQ_GS_ALLOCED: 326 case VREQ_DMABUF_BOUND: 327 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 328 /*FALLTHRU*/ 329 case VREQ_DMAMEM_ALLOCED: 330 if (!ALIGNED_XFER(bp)) { 331 ASSERT(vreq->v_abuf != NULL); 332 if (!IS_ERROR(bp) && IS_READ(bp)) 333 bcopy(vreq->v_abuf, bp->b_un.b_addr, 334 bp->b_bcount); 335 ddi_dma_mem_free(&vreq->v_align); 336 } 337 /*FALLTHRU*/ 338 case VREQ_MEMDMAHDL_ALLOCED: 339 if (!ALIGNED_XFER(bp)) 340 ddi_dma_free_handle(&vreq->v_memdmahdl); 341 /*FALLTHRU*/ 342 case VREQ_DMAHDL_ALLOCED: 343 ddi_dma_free_handle(&vreq->v_dmahdl); 344 break; 345 default: 346 break; 347 } 348 done: 349 ASSERT(!vreq->v_runq); 350 list_destroy(&vreq->v_gs); 351 kmem_cache_free(xdf_vreq_cache, vreq); 352 } 353 354 /* 355 * Snarf new data if our flush block was re-written 356 */ 357 static void 358 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 359 { 360 int nblks; 361 boolean_t mapin; 362 363 if (IS_WRITE_BARRIER(vdp, bp)) 364 return; /* write was a flush write */ 365 366 mapin = B_FALSE; 367 nblks = bp->b_bcount >> DEV_BSHIFT; 368 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 369 xdf_fbrewrites++; 370 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 371 mapin = B_TRUE; 372 bp_mapin(bp); 373 } 374 bcopy(bp->b_un.b_addr + 375 ((xdf_flush_block - blkno) << DEV_BSHIFT), 376 vdp->xdf_cache_flush_block, DEV_BSIZE); 377 if (mapin) 378 bp_mapout(bp); 379 } 380 } 381 382 /* 383 * Initalize the DMA and grant table resources for the buf 384 */ 385 static int 386 vreq_setup(xdf_t *vdp, v_req_t *vreq) 387 { 388 int rc; 389 ddi_dma_attr_t dmaattr; 390 uint_t ndcs, ndws; 391 ddi_dma_handle_t dh; 392 ddi_dma_handle_t mdh; 393 ddi_dma_cookie_t dc; 394 ddi_acc_handle_t abh; 395 caddr_t aba; 396 ge_slot_t *gs; 397 size_t bufsz; 398 off_t off; 399 size_t sz; 400 buf_t *bp = vreq->v_buf; 401 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 402 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 403 404 switch (vreq->v_status) { 405 case VREQ_INIT: 406 if (IS_FLUSH_DISKCACHE(bp)) { 407 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 408 DPRINTF(DMA_DBG, ("xdf@%s: " 409 "get ge_slotfailed\n", vdp->xdf_addr)); 410 return (DDI_FAILURE); 411 } 412 vreq->v_blkno = 0; 413 vreq->v_nslots = 1; 414 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 415 vreq->v_status = VREQ_GS_ALLOCED; 416 gs->gs_vreq = vreq; 417 list_insert_head(&vreq->v_gs, gs); 418 return (DDI_SUCCESS); 419 } 420 421 if (IS_WRITE_BARRIER(vdp, bp)) 422 vreq->v_flush_diskcache = WRITE_BARRIER; 423 vreq->v_blkno = bp->b_blkno + 424 (diskaddr_t)(uintptr_t)bp->b_private; 425 /* See if we wrote new data to our flush block */ 426 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 427 check_fbwrite(vdp, bp, vreq->v_blkno); 428 vreq->v_status = VREQ_INIT_DONE; 429 /*FALLTHRU*/ 430 431 case VREQ_INIT_DONE: 432 /* 433 * alloc DMA handle 434 */ 435 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 436 xdf_dmacallback, (caddr_t)vdp, &dh); 437 if (rc != DDI_SUCCESS) { 438 SETDMACBON(vdp); 439 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 440 vdp->xdf_addr)); 441 return (DDI_FAILURE); 442 } 443 444 vreq->v_dmahdl = dh; 445 vreq->v_status = VREQ_DMAHDL_ALLOCED; 446 /*FALLTHRU*/ 447 448 case VREQ_DMAHDL_ALLOCED: 449 /* 450 * alloc dma handle for 512-byte aligned buf 451 */ 452 if (!ALIGNED_XFER(bp)) { 453 /* 454 * XXPV: we need to temporarily enlarge the seg 455 * boundary and s/g length to work round CR6381968 456 */ 457 dmaattr = xb_dma_attr; 458 dmaattr.dma_attr_seg = (uint64_t)-1; 459 dmaattr.dma_attr_sgllen = INT_MAX; 460 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 461 xdf_dmacallback, (caddr_t)vdp, &mdh); 462 if (rc != DDI_SUCCESS) { 463 SETDMACBON(vdp); 464 DPRINTF(DMA_DBG, ("xdf@%s: " 465 "unaligned buf DMAhandle alloc failed\n", 466 vdp->xdf_addr)); 467 return (DDI_FAILURE); 468 } 469 vreq->v_memdmahdl = mdh; 470 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 471 } 472 /*FALLTHRU*/ 473 474 case VREQ_MEMDMAHDL_ALLOCED: 475 /* 476 * alloc 512-byte aligned buf 477 */ 478 if (!ALIGNED_XFER(bp)) { 479 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 480 bp_mapin(bp); 481 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 482 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 483 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 484 &aba, &bufsz, &abh); 485 if (rc != DDI_SUCCESS) { 486 SETDMACBON(vdp); 487 DPRINTF(DMA_DBG, ("xdf@%s: " 488 "DMA mem allocation failed\n", 489 vdp->xdf_addr)); 490 return (DDI_FAILURE); 491 } 492 493 vreq->v_abuf = aba; 494 vreq->v_align = abh; 495 vreq->v_status = VREQ_DMAMEM_ALLOCED; 496 497 ASSERT(bufsz >= bp->b_bcount); 498 if (!IS_READ(bp)) 499 bcopy(bp->b_un.b_addr, vreq->v_abuf, 500 bp->b_bcount); 501 } 502 /*FALLTHRU*/ 503 504 case VREQ_DMAMEM_ALLOCED: 505 /* 506 * dma bind 507 */ 508 if (ALIGNED_XFER(bp)) { 509 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 510 dma_flags, xdf_dmacallback, (caddr_t)vdp, 511 &dc, &ndcs); 512 } else { 513 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 514 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 515 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 516 } 517 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 518 /* get num of dma windows */ 519 if (rc == DDI_DMA_PARTIAL_MAP) { 520 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 521 ASSERT(rc == DDI_SUCCESS); 522 } else { 523 ndws = 1; 524 } 525 } else { 526 SETDMACBON(vdp); 527 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 528 vdp->xdf_addr)); 529 return (DDI_FAILURE); 530 } 531 532 vreq->v_dmac = dc; 533 vreq->v_dmaw = 0; 534 vreq->v_ndmacs = ndcs; 535 vreq->v_ndmaws = ndws; 536 vreq->v_nslots = ndws; 537 vreq->v_status = VREQ_DMABUF_BOUND; 538 /*FALLTHRU*/ 539 540 case VREQ_DMABUF_BOUND: 541 /* 542 * get ge_slot, callback is set upon failure from gs_get(), 543 * if not set previously 544 */ 545 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 546 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 547 vdp->xdf_addr)); 548 return (DDI_FAILURE); 549 } 550 551 vreq->v_status = VREQ_GS_ALLOCED; 552 gs->gs_vreq = vreq; 553 list_insert_head(&vreq->v_gs, gs); 554 break; 555 556 case VREQ_GS_ALLOCED: 557 /* nothing need to be done */ 558 break; 559 560 case VREQ_DMAWIN_DONE: 561 /* 562 * move to the next dma window 563 */ 564 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 565 566 /* get a ge_slot for this DMA window */ 567 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 568 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 569 vdp->xdf_addr)); 570 return (DDI_FAILURE); 571 } 572 573 vreq->v_dmaw++; 574 VERIFY(ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 575 &vreq->v_dmac, &vreq->v_ndmacs) == DDI_SUCCESS); 576 vreq->v_status = VREQ_GS_ALLOCED; 577 gs->gs_vreq = vreq; 578 list_insert_head(&vreq->v_gs, gs); 579 break; 580 581 default: 582 return (DDI_FAILURE); 583 } 584 585 return (DDI_SUCCESS); 586 } 587 588 static int 589 xdf_cmlb_attach(xdf_t *vdp) 590 { 591 dev_info_t *dip = vdp->xdf_dip; 592 593 return (cmlb_attach(dip, &xdf_lb_ops, 594 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 595 XD_IS_RM(vdp), 596 B_TRUE, 597 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD, 598 #if defined(XPV_HVM_DRIVER) 599 (XD_IS_CD(vdp) ? 0 : CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT) | 600 CMLB_INTERNAL_MINOR_NODES, 601 #else /* !XPV_HVM_DRIVER */ 602 XD_IS_CD(vdp) ? 0 : CMLB_FAKE_LABEL_ONE_PARTITION, 603 #endif /* !XPV_HVM_DRIVER */ 604 vdp->xdf_vd_lbl, NULL)); 605 } 606 607 static void 608 xdf_io_err(buf_t *bp, int err, size_t resid) 609 { 610 bioerror(bp, err); 611 if (resid == 0) 612 bp->b_resid = bp->b_bcount; 613 biodone(bp); 614 } 615 616 static void 617 xdf_kstat_enter(xdf_t *vdp, buf_t *bp) 618 { 619 v_req_t *vreq = BP_VREQ(bp); 620 621 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 622 623 if (vdp->xdf_xdev_iostat == NULL) 624 return; 625 if ((vreq != NULL) && vreq->v_runq) { 626 kstat_runq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 627 } else { 628 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 629 } 630 } 631 632 static void 633 xdf_kstat_exit(xdf_t *vdp, buf_t *bp) 634 { 635 v_req_t *vreq = BP_VREQ(bp); 636 637 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 638 639 if (vdp->xdf_xdev_iostat == NULL) 640 return; 641 642 if ((vreq != NULL) && vreq->v_runq) { 643 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 644 } else { 645 kstat_waitq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 646 } 647 648 if (bp->b_flags & B_READ) { 649 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->reads++; 650 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->nread += bp->b_bcount; 651 } else if (bp->b_flags & B_WRITE) { 652 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->writes++; 653 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->nwritten += bp->b_bcount; 654 } 655 } 656 657 static void 658 xdf_kstat_waitq_to_runq(xdf_t *vdp, buf_t *bp) 659 { 660 v_req_t *vreq = BP_VREQ(bp); 661 662 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 663 ASSERT(!vreq->v_runq); 664 665 vreq->v_runq = B_TRUE; 666 if (vdp->xdf_xdev_iostat == NULL) 667 return; 668 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 669 } 670 671 static void 672 xdf_kstat_runq_to_waitq(xdf_t *vdp, buf_t *bp) 673 { 674 v_req_t *vreq = BP_VREQ(bp); 675 676 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 677 ASSERT(vreq->v_runq); 678 679 vreq->v_runq = B_FALSE; 680 if (vdp->xdf_xdev_iostat == NULL) 681 return; 682 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 683 } 684 685 int 686 xdf_kstat_create(dev_info_t *dip, char *ks_module, int instance) 687 { 688 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 689 kstat_t *kstat; 690 buf_t *bp; 691 692 if ((kstat = kstat_create( 693 ks_module, instance, NULL, "disk", 694 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) 695 return (-1); 696 697 /* See comment about locking in xdf_kstat_delete(). */ 698 mutex_enter(&vdp->xdf_iostat_lk); 699 mutex_enter(&vdp->xdf_dev_lk); 700 701 /* only one kstat can exist at a time */ 702 if (vdp->xdf_xdev_iostat != NULL) { 703 mutex_exit(&vdp->xdf_dev_lk); 704 mutex_exit(&vdp->xdf_iostat_lk); 705 kstat_delete(kstat); 706 return (-1); 707 } 708 709 vdp->xdf_xdev_iostat = kstat; 710 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 711 kstat_install(vdp->xdf_xdev_iostat); 712 713 /* 714 * Now that we've created a kstat, we need to update the waitq and 715 * runq counts for the kstat to reflect our current state. 716 * 717 * For a buf_t structure to be on the runq, it must have a ring 718 * buffer slot associated with it. To get a ring buffer slot the 719 * buf must first have a v_req_t and a ge_slot_t associated with it. 720 * Then when it is granted a ring buffer slot, v_runq will be set to 721 * true. 722 * 723 * For a buf_t structure to be on the waitq, it must not be on the 724 * runq. So to find all the buf_t's that should be on waitq, we 725 * walk the active buf list and add any buf_t's which aren't on the 726 * runq to the waitq. 727 */ 728 bp = vdp->xdf_f_act; 729 while (bp != NULL) { 730 xdf_kstat_enter(vdp, bp); 731 bp = bp->av_forw; 732 } 733 if (vdp->xdf_ready_tq_bp != NULL) 734 xdf_kstat_enter(vdp, vdp->xdf_ready_tq_bp); 735 736 mutex_exit(&vdp->xdf_dev_lk); 737 mutex_exit(&vdp->xdf_iostat_lk); 738 return (0); 739 } 740 741 void 742 xdf_kstat_delete(dev_info_t *dip) 743 { 744 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 745 kstat_t *kstat; 746 buf_t *bp; 747 748 /* 749 * The locking order here is xdf_iostat_lk and then xdf_dev_lk. 750 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer 751 * and the contents of the our kstat. xdf_iostat_lk is used 752 * to protect the allocation and freeing of the actual kstat. 753 * xdf_dev_lk can't be used for this purpose because kstat 754 * readers use it to access the contents of the kstat and 755 * hence it can't be held when calling kstat_delete(). 756 */ 757 mutex_enter(&vdp->xdf_iostat_lk); 758 mutex_enter(&vdp->xdf_dev_lk); 759 760 if (vdp->xdf_xdev_iostat == NULL) { 761 mutex_exit(&vdp->xdf_dev_lk); 762 mutex_exit(&vdp->xdf_iostat_lk); 763 return; 764 } 765 766 /* 767 * We're about to destroy the kstat structures, so it isn't really 768 * necessary to update the runq and waitq counts. But, since this 769 * isn't a hot code path we can afford to be a little pedantic and 770 * go ahead and decrement the runq and waitq kstat counters to zero 771 * before free'ing them. This helps us ensure that we've gotten all 772 * our accounting correct. 773 * 774 * For an explanation of how we determine which buffers go on the 775 * runq vs which go on the waitq, see the comments in 776 * xdf_kstat_create(). 777 */ 778 bp = vdp->xdf_f_act; 779 while (bp != NULL) { 780 xdf_kstat_exit(vdp, bp); 781 bp = bp->av_forw; 782 } 783 if (vdp->xdf_ready_tq_bp != NULL) 784 xdf_kstat_exit(vdp, vdp->xdf_ready_tq_bp); 785 786 kstat = vdp->xdf_xdev_iostat; 787 vdp->xdf_xdev_iostat = NULL; 788 mutex_exit(&vdp->xdf_dev_lk); 789 kstat_delete(kstat); 790 mutex_exit(&vdp->xdf_iostat_lk); 791 } 792 793 /* 794 * Add an IO requests onto the active queue. 795 * 796 * We have to detect IOs generated by xdf_ready_tq_thread. These IOs 797 * are used to establish a connection to the backend, so they recieve 798 * priority over all other IOs. Since xdf_ready_tq_thread only does 799 * synchronous IO, there can only be one xdf_ready_tq_thread request at any 800 * given time and we record the buf associated with that request in 801 * xdf_ready_tq_bp. 802 */ 803 static void 804 xdf_bp_push(xdf_t *vdp, buf_t *bp) 805 { 806 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 807 ASSERT(bp->av_forw == NULL); 808 809 xdf_kstat_enter(vdp, bp); 810 811 if (curthread == vdp->xdf_ready_tq_thread) { 812 /* new IO requests from the ready thread */ 813 ASSERT(vdp->xdf_ready_tq_bp == NULL); 814 vdp->xdf_ready_tq_bp = bp; 815 return; 816 } 817 818 /* this is normal IO request */ 819 ASSERT(bp != vdp->xdf_ready_tq_bp); 820 821 if (vdp->xdf_f_act == NULL) { 822 /* this is only only IO on the active queue */ 823 ASSERT(vdp->xdf_l_act == NULL); 824 ASSERT(vdp->xdf_i_act == NULL); 825 vdp->xdf_f_act = vdp->xdf_l_act = vdp->xdf_i_act = bp; 826 return; 827 } 828 829 /* add this IO to the tail of the active queue */ 830 vdp->xdf_l_act->av_forw = bp; 831 vdp->xdf_l_act = bp; 832 if (vdp->xdf_i_act == NULL) 833 vdp->xdf_i_act = bp; 834 } 835 836 static void 837 xdf_bp_pop(xdf_t *vdp, buf_t *bp) 838 { 839 buf_t *bp_iter; 840 841 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 842 ASSERT(VREQ_DONE(BP_VREQ(bp))); 843 844 if (vdp->xdf_ready_tq_bp == bp) { 845 /* we're done with a ready thread IO request */ 846 ASSERT(bp->av_forw == NULL); 847 vdp->xdf_ready_tq_bp = NULL; 848 return; 849 } 850 851 /* we're done with a normal IO request */ 852 ASSERT((bp->av_forw != NULL) || (bp == vdp->xdf_l_act)); 853 ASSERT((bp->av_forw == NULL) || (bp != vdp->xdf_l_act)); 854 ASSERT(VREQ_DONE(BP_VREQ(vdp->xdf_f_act))); 855 ASSERT(vdp->xdf_f_act != vdp->xdf_i_act); 856 857 if (bp == vdp->xdf_f_act) { 858 /* This IO was at the head of our active queue. */ 859 vdp->xdf_f_act = bp->av_forw; 860 if (bp == vdp->xdf_l_act) 861 vdp->xdf_l_act = NULL; 862 } else { 863 /* There IO finished before some other pending IOs. */ 864 bp_iter = vdp->xdf_f_act; 865 while (bp != bp_iter->av_forw) { 866 bp_iter = bp_iter->av_forw; 867 ASSERT(VREQ_DONE(BP_VREQ(bp_iter))); 868 ASSERT(bp_iter != vdp->xdf_i_act); 869 } 870 bp_iter->av_forw = bp->av_forw; 871 if (bp == vdp->xdf_l_act) 872 vdp->xdf_l_act = bp_iter; 873 } 874 bp->av_forw = NULL; 875 } 876 877 static buf_t * 878 xdf_bp_next(xdf_t *vdp) 879 { 880 v_req_t *vreq; 881 buf_t *bp; 882 883 if (vdp->xdf_state == XD_CONNECTED) { 884 /* 885 * If we're in the XD_CONNECTED state, we only service IOs 886 * from the xdf_ready_tq_thread thread. 887 */ 888 if ((bp = vdp->xdf_ready_tq_bp) == NULL) 889 return (NULL); 890 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 891 return (bp); 892 return (NULL); 893 } 894 895 /* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */ 896 if (vdp->xdf_state != XD_READY) 897 return (NULL); 898 899 ASSERT(vdp->xdf_ready_tq_bp == NULL); 900 for (;;) { 901 if ((bp = vdp->xdf_i_act) == NULL) 902 return (NULL); 903 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 904 return (bp); 905 906 /* advance the active buf index pointer */ 907 vdp->xdf_i_act = bp->av_forw; 908 } 909 } 910 911 static void 912 xdf_io_fini(xdf_t *vdp, uint64_t id, int bioerr) 913 { 914 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 915 v_req_t *vreq = gs->gs_vreq; 916 buf_t *bp = vreq->v_buf; 917 918 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 919 ASSERT(BP_VREQ(bp) == vreq); 920 921 gs_free(gs); 922 923 if (bioerr != 0) 924 bioerror(bp, bioerr); 925 ASSERT(vreq->v_nslots > 0); 926 if (--vreq->v_nslots > 0) 927 return; 928 929 /* remove this IO from our active queue */ 930 xdf_bp_pop(vdp, bp); 931 932 ASSERT(vreq->v_runq); 933 xdf_kstat_exit(vdp, bp); 934 vreq->v_runq = B_FALSE; 935 vreq_free(vdp, vreq); 936 937 if (IS_ERROR(bp)) { 938 xdf_io_err(bp, geterror(bp), 0); 939 } else if (bp->b_resid != 0) { 940 /* Partial transfers are an error */ 941 xdf_io_err(bp, EIO, bp->b_resid); 942 } else { 943 biodone(bp); 944 } 945 } 946 947 /* 948 * xdf interrupt handler 949 */ 950 static uint_t 951 xdf_intr_locked(xdf_t *vdp) 952 { 953 xendev_ring_t *xbr; 954 blkif_response_t *resp; 955 int bioerr; 956 uint64_t id; 957 uint8_t op; 958 uint16_t status; 959 ddi_acc_handle_t acchdl; 960 961 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 962 963 if ((xbr = vdp->xdf_xb_ring) == NULL) 964 return (DDI_INTR_UNCLAIMED); 965 966 acchdl = vdp->xdf_xb_ring_hdl; 967 968 /* 969 * complete all requests which have a response 970 */ 971 while (resp = xvdi_ring_get_response(xbr)) { 972 id = ddi_get64(acchdl, &resp->id); 973 op = ddi_get8(acchdl, &resp->operation); 974 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 975 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 976 op, id, status)); 977 978 if (status != BLKIF_RSP_OKAY) { 979 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 980 vdp->xdf_addr, 981 (op == BLKIF_OP_READ) ? "reading" : "writing")); 982 bioerr = EIO; 983 } else { 984 bioerr = 0; 985 } 986 987 xdf_io_fini(vdp, id, bioerr); 988 } 989 return (DDI_INTR_CLAIMED); 990 } 991 992 /* 993 * xdf_intr runs at PIL 5, so no one else can grab xdf_dev_lk and 994 * block at a lower pil. 995 */ 996 static uint_t 997 xdf_intr(caddr_t arg) 998 { 999 xdf_t *vdp = (xdf_t *)arg; 1000 int rv; 1001 1002 mutex_enter(&vdp->xdf_dev_lk); 1003 rv = xdf_intr_locked(vdp); 1004 mutex_exit(&vdp->xdf_dev_lk); 1005 1006 if (!do_polled_io) 1007 xdf_io_start(vdp); 1008 1009 return (rv); 1010 } 1011 1012 static void 1013 xdf_ring_push(xdf_t *vdp) 1014 { 1015 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1016 1017 if (vdp->xdf_xb_ring == NULL) 1018 return; 1019 1020 if (xvdi_ring_push_request(vdp->xdf_xb_ring)) { 1021 DPRINTF(IO_DBG, ( 1022 "xdf@%s: xdf_ring_push: sent request(s) to backend\n", 1023 vdp->xdf_addr)); 1024 } 1025 1026 if (xvdi_get_evtchn(vdp->xdf_dip) != INVALID_EVTCHN) 1027 xvdi_notify_oe(vdp->xdf_dip); 1028 } 1029 1030 static int 1031 xdf_ring_drain_locked(xdf_t *vdp) 1032 { 1033 int pollc, rv = 0; 1034 1035 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1036 1037 if (xdf_debug & SUSRES_DBG) 1038 xen_printf("xdf_ring_drain: start\n"); 1039 1040 for (pollc = 0; pollc < XDF_DRAIN_RETRY_COUNT; pollc++) { 1041 if (vdp->xdf_xb_ring == NULL) 1042 goto out; 1043 1044 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1045 (void) xdf_intr_locked(vdp); 1046 if (!xvdi_ring_has_incomp_request(vdp->xdf_xb_ring)) 1047 goto out; 1048 xdf_ring_push(vdp); 1049 1050 /* file-backed devices can be slow */ 1051 mutex_exit(&vdp->xdf_dev_lk); 1052 #ifdef XPV_HVM_DRIVER 1053 (void) HYPERVISOR_yield(); 1054 #endif /* XPV_HVM_DRIVER */ 1055 delay(drv_usectohz(XDF_DRAIN_MSEC_DELAY)); 1056 mutex_enter(&vdp->xdf_dev_lk); 1057 } 1058 cmn_err(CE_WARN, "xdf@%s: xdf_ring_drain: timeout", vdp->xdf_addr); 1059 1060 out: 1061 if (vdp->xdf_xb_ring != NULL) { 1062 if (xvdi_ring_has_incomp_request(vdp->xdf_xb_ring) || 1063 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1064 rv = EIO; 1065 } 1066 if (xdf_debug & SUSRES_DBG) 1067 xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n", 1068 vdp->xdf_addr, rv); 1069 return (rv); 1070 } 1071 1072 static int 1073 xdf_ring_drain(xdf_t *vdp) 1074 { 1075 int rv; 1076 mutex_enter(&vdp->xdf_dev_lk); 1077 rv = xdf_ring_drain_locked(vdp); 1078 mutex_exit(&vdp->xdf_dev_lk); 1079 return (rv); 1080 } 1081 1082 /* 1083 * Destroy all v_req_t, grant table entries, and our ring buffer. 1084 */ 1085 static void 1086 xdf_ring_destroy(xdf_t *vdp) 1087 { 1088 v_req_t *vreq; 1089 buf_t *bp; 1090 ge_slot_t *gs; 1091 1092 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1093 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1094 1095 if ((vdp->xdf_state != XD_INIT) && 1096 (vdp->xdf_state != XD_CONNECTED) && 1097 (vdp->xdf_state != XD_READY)) { 1098 ASSERT(vdp->xdf_xb_ring == NULL); 1099 ASSERT(vdp->xdf_xb_ring_hdl == NULL); 1100 ASSERT(vdp->xdf_peer == INVALID_DOMID); 1101 ASSERT(vdp->xdf_evtchn == INVALID_EVTCHN); 1102 ASSERT(list_is_empty(&vdp->xdf_vreq_act)); 1103 return; 1104 } 1105 1106 /* 1107 * We don't want to recieve async notifications from the backend 1108 * when it finishes processing ring entries. 1109 */ 1110 #ifdef XPV_HVM_DRIVER 1111 ec_unbind_evtchn(vdp->xdf_evtchn); 1112 #else /* !XPV_HVM_DRIVER */ 1113 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1114 #endif /* !XPV_HVM_DRIVER */ 1115 1116 /* 1117 * Drain any requests in the ring. We need to do this before we 1118 * can free grant table entries, because if active ring entries 1119 * point to grants, then the backend could be trying to access 1120 * those grants. 1121 */ 1122 (void) xdf_ring_drain_locked(vdp); 1123 1124 /* We're done talking to the backend so free up our event channel */ 1125 xvdi_free_evtchn(vdp->xdf_dip); 1126 vdp->xdf_evtchn = INVALID_EVTCHN; 1127 1128 while ((vreq = list_head(&vdp->xdf_vreq_act)) != NULL) { 1129 bp = vreq->v_buf; 1130 ASSERT(BP_VREQ(bp) == vreq); 1131 1132 /* Free up any grant table entries associaed with this IO */ 1133 while ((gs = list_head(&vreq->v_gs)) != NULL) 1134 gs_free(gs); 1135 1136 /* If this IO was on the runq, move it back to the waitq. */ 1137 if (vreq->v_runq) 1138 xdf_kstat_runq_to_waitq(vdp, bp); 1139 1140 /* 1141 * Reset any buf IO state since we're going to re-issue the 1142 * IO when we reconnect. 1143 */ 1144 vreq_free(vdp, vreq); 1145 BP_VREQ_SET(bp, NULL); 1146 bioerror(bp, 0); 1147 } 1148 1149 /* reset the active queue index pointer */ 1150 vdp->xdf_i_act = vdp->xdf_f_act; 1151 1152 /* Destroy the ring */ 1153 xvdi_free_ring(vdp->xdf_xb_ring); 1154 vdp->xdf_xb_ring = NULL; 1155 vdp->xdf_xb_ring_hdl = NULL; 1156 vdp->xdf_peer = INVALID_DOMID; 1157 } 1158 1159 void 1160 xdfmin(struct buf *bp) 1161 { 1162 if (bp->b_bcount > xdf_maxphys) 1163 bp->b_bcount = xdf_maxphys; 1164 } 1165 1166 /* 1167 * Check if we have a pending "eject" media request. 1168 */ 1169 static int 1170 xdf_eject_pending(xdf_t *vdp) 1171 { 1172 dev_info_t *dip = vdp->xdf_dip; 1173 char *xsname, *str; 1174 1175 if (!vdp->xdf_media_req_supported) 1176 return (B_FALSE); 1177 1178 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1179 (xenbus_read_str(xsname, XBP_MEDIA_REQ, &str) != 0)) 1180 return (B_FALSE); 1181 1182 if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) { 1183 strfree(str); 1184 return (B_FALSE); 1185 } 1186 strfree(str); 1187 return (B_TRUE); 1188 } 1189 1190 /* 1191 * Generate a media request. 1192 */ 1193 static int 1194 xdf_media_req(xdf_t *vdp, char *req, boolean_t media_required) 1195 { 1196 dev_info_t *dip = vdp->xdf_dip; 1197 char *xsname; 1198 1199 /* 1200 * we can't be holding xdf_dev_lk because xenbus_printf() can 1201 * block while waiting for a PIL 1 interrupt message. this 1202 * would cause a deadlock with xdf_intr() which needs to grab 1203 * xdf_dev_lk as well and runs at PIL 5. 1204 */ 1205 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1206 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1207 1208 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1209 return (ENXIO); 1210 1211 /* Check if we support media requests */ 1212 if (!XD_IS_CD(vdp) || !vdp->xdf_media_req_supported) 1213 return (ENOTTY); 1214 1215 /* If an eject is pending then don't allow any new requests */ 1216 if (xdf_eject_pending(vdp)) 1217 return (ENXIO); 1218 1219 /* Make sure that there is media present */ 1220 if (media_required && (vdp->xdf_xdev_nblocks == 0)) 1221 return (ENXIO); 1222 1223 /* We only allow operations when the device is ready and connected */ 1224 if (vdp->xdf_state != XD_READY) 1225 return (EIO); 1226 1227 if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ, "%s", req) != 0) 1228 return (EIO); 1229 1230 return (0); 1231 } 1232 1233 /* 1234 * populate a single blkif_request_t w/ a buf 1235 */ 1236 static void 1237 xdf_process_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1238 { 1239 grant_ref_t gr; 1240 uint8_t fsect, lsect; 1241 size_t bcnt; 1242 paddr_t dma_addr; 1243 off_t blk_off; 1244 dev_info_t *dip = vdp->xdf_dip; 1245 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1246 v_req_t *vreq = BP_VREQ(bp); 1247 uint64_t blkno = vreq->v_blkno; 1248 uint_t ndmacs = vreq->v_ndmacs; 1249 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1250 int seg = 0; 1251 int isread = IS_READ(bp); 1252 ge_slot_t *gs = list_head(&vreq->v_gs); 1253 1254 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1255 ASSERT(vreq->v_status == VREQ_GS_ALLOCED); 1256 1257 if (isread) 1258 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1259 else { 1260 switch (vreq->v_flush_diskcache) { 1261 case FLUSH_DISKCACHE: 1262 ddi_put8(acchdl, &rreq->operation, 1263 BLKIF_OP_FLUSH_DISKCACHE); 1264 ddi_put16(acchdl, &rreq->handle, vdev); 1265 ddi_put64(acchdl, &rreq->id, 1266 (uint64_t)(uintptr_t)(gs)); 1267 ddi_put8(acchdl, &rreq->nr_segments, 0); 1268 vreq->v_status = VREQ_DMAWIN_DONE; 1269 return; 1270 case WRITE_BARRIER: 1271 ddi_put8(acchdl, &rreq->operation, 1272 BLKIF_OP_WRITE_BARRIER); 1273 break; 1274 default: 1275 if (!vdp->xdf_wce) 1276 ddi_put8(acchdl, &rreq->operation, 1277 BLKIF_OP_WRITE_BARRIER); 1278 else 1279 ddi_put8(acchdl, &rreq->operation, 1280 BLKIF_OP_WRITE); 1281 break; 1282 } 1283 } 1284 1285 ddi_put16(acchdl, &rreq->handle, vdev); 1286 ddi_put64(acchdl, &rreq->sector_number, blkno); 1287 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(gs)); 1288 1289 /* 1290 * loop until all segments are populated or no more dma cookie in buf 1291 */ 1292 for (;;) { 1293 /* 1294 * Each segment of a blkif request can transfer up to 1295 * one 4K page of data. 1296 */ 1297 bcnt = vreq->v_dmac.dmac_size; 1298 dma_addr = vreq->v_dmac.dmac_laddress; 1299 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1300 fsect = blk_off >> XB_BSHIFT; 1301 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1302 1303 ASSERT(bcnt <= PAGESIZE); 1304 ASSERT((bcnt % XB_BSIZE) == 0); 1305 ASSERT((blk_off & XB_BMASK) == 0); 1306 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1307 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1308 1309 gr = gs_grant(gs, PATOMA(dma_addr) >> PAGESHIFT); 1310 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1311 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1312 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1313 1314 DPRINTF(IO_DBG, ( 1315 "xdf@%s: seg%d: dmacS %lu blk_off %ld\n", 1316 vdp->xdf_addr, seg, vreq->v_dmac.dmac_size, blk_off)); 1317 DPRINTF(IO_DBG, ( 1318 "xdf@%s: seg%d: fs %d ls %d gr %d dma 0x%"PRIx64"\n", 1319 vdp->xdf_addr, seg, fsect, lsect, gr, dma_addr)); 1320 1321 blkno += (bcnt >> XB_BSHIFT); 1322 seg++; 1323 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1324 if (--ndmacs) { 1325 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1326 continue; 1327 } 1328 1329 vreq->v_status = VREQ_DMAWIN_DONE; 1330 vreq->v_blkno = blkno; 1331 break; 1332 } 1333 ddi_put8(acchdl, &rreq->nr_segments, seg); 1334 DPRINTF(IO_DBG, ( 1335 "xdf@%s: xdf_process_rreq: request id=%"PRIx64" ready\n", 1336 vdp->xdf_addr, rreq->id)); 1337 } 1338 1339 static void 1340 xdf_io_start(xdf_t *vdp) 1341 { 1342 struct buf *bp; 1343 v_req_t *vreq; 1344 blkif_request_t *rreq; 1345 boolean_t rreqready = B_FALSE; 1346 1347 mutex_enter(&vdp->xdf_dev_lk); 1348 1349 /* 1350 * Populate the ring request(s). Loop until there is no buf to 1351 * transfer or no free slot available in I/O ring. 1352 */ 1353 for (;;) { 1354 /* don't start any new IO if we're suspending */ 1355 if (vdp->xdf_suspending) 1356 break; 1357 if ((bp = xdf_bp_next(vdp)) == NULL) 1358 break; 1359 1360 /* if the buf doesn't already have a vreq, allocate one */ 1361 if (((vreq = BP_VREQ(bp)) == NULL) && 1362 ((vreq = vreq_get(vdp, bp)) == NULL)) 1363 break; 1364 1365 /* alloc DMA/GTE resources */ 1366 if (vreq_setup(vdp, vreq) != DDI_SUCCESS) 1367 break; 1368 1369 /* get next blkif_request in the ring */ 1370 if ((rreq = xvdi_ring_get_request(vdp->xdf_xb_ring)) == NULL) 1371 break; 1372 bzero(rreq, sizeof (blkif_request_t)); 1373 rreqready = B_TRUE; 1374 1375 /* populate blkif_request with this buf */ 1376 xdf_process_rreq(vdp, bp, rreq); 1377 1378 /* 1379 * This buffer/vreq pair is has been allocated a ring buffer 1380 * resources, so if it isn't already in our runq, add it. 1381 */ 1382 if (!vreq->v_runq) 1383 xdf_kstat_waitq_to_runq(vdp, bp); 1384 } 1385 1386 /* Send the request(s) to the backend */ 1387 if (rreqready) 1388 xdf_ring_push(vdp); 1389 1390 mutex_exit(&vdp->xdf_dev_lk); 1391 } 1392 1393 1394 /* check if partition is open, -1 - check all partitions on the disk */ 1395 static boolean_t 1396 xdf_isopen(xdf_t *vdp, int partition) 1397 { 1398 int i; 1399 ulong_t parbit; 1400 boolean_t rval = B_FALSE; 1401 1402 ASSERT((partition == -1) || 1403 ((partition >= 0) || (partition < XDF_PEXT))); 1404 1405 if (partition == -1) 1406 parbit = (ulong_t)-1; 1407 else 1408 parbit = 1 << partition; 1409 1410 for (i = 0; i < OTYPCNT; i++) { 1411 if (vdp->xdf_vd_open[i] & parbit) 1412 rval = B_TRUE; 1413 } 1414 1415 return (rval); 1416 } 1417 1418 /* 1419 * The connection should never be closed as long as someone is holding 1420 * us open, there is pending IO, or someone is waiting waiting for a 1421 * connection. 1422 */ 1423 static boolean_t 1424 xdf_busy(xdf_t *vdp) 1425 { 1426 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1427 1428 if ((vdp->xdf_xb_ring != NULL) && 1429 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 1430 ASSERT(vdp->xdf_state != XD_CLOSED); 1431 return (B_TRUE); 1432 } 1433 1434 if (!list_is_empty(&vdp->xdf_vreq_act) || (vdp->xdf_f_act != NULL)) { 1435 ASSERT(vdp->xdf_state != XD_CLOSED); 1436 return (B_TRUE); 1437 } 1438 1439 if (xdf_isopen(vdp, -1)) { 1440 ASSERT(vdp->xdf_state != XD_CLOSED); 1441 return (B_TRUE); 1442 } 1443 1444 if (vdp->xdf_connect_req > 0) { 1445 ASSERT(vdp->xdf_state != XD_CLOSED); 1446 return (B_TRUE); 1447 } 1448 1449 return (B_FALSE); 1450 } 1451 1452 static void 1453 xdf_set_state(xdf_t *vdp, xdf_state_t new_state) 1454 { 1455 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1456 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1457 DPRINTF(DDI_DBG, ("xdf@%s: state change %d -> %d\n", 1458 vdp->xdf_addr, vdp->xdf_state, new_state)); 1459 vdp->xdf_state = new_state; 1460 cv_broadcast(&vdp->xdf_dev_cv); 1461 } 1462 1463 static void 1464 xdf_disconnect(xdf_t *vdp, xdf_state_t new_state, boolean_t quiet) 1465 { 1466 dev_info_t *dip = vdp->xdf_dip; 1467 boolean_t busy; 1468 1469 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1470 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1471 ASSERT((new_state == XD_UNKNOWN) || (new_state == XD_CLOSED)); 1472 1473 /* Check if we're already there. */ 1474 if (vdp->xdf_state == new_state) 1475 return; 1476 1477 mutex_enter(&vdp->xdf_dev_lk); 1478 busy = xdf_busy(vdp); 1479 1480 /* If we're already closed then there's nothing todo. */ 1481 if (vdp->xdf_state == XD_CLOSED) { 1482 ASSERT(!busy); 1483 xdf_set_state(vdp, new_state); 1484 mutex_exit(&vdp->xdf_dev_lk); 1485 return; 1486 } 1487 1488 #ifdef DEBUG 1489 /* UhOh. Warn the user that something bad has happened. */ 1490 if (!quiet && busy && (vdp->xdf_state == XD_READY) && 1491 (vdp->xdf_xdev_nblocks != 0)) { 1492 cmn_err(CE_WARN, "xdf@%s: disconnected while in use", 1493 vdp->xdf_addr); 1494 } 1495 #endif /* DEBUG */ 1496 1497 xdf_ring_destroy(vdp); 1498 1499 /* If we're busy then we can only go into the unknown state */ 1500 xdf_set_state(vdp, (busy) ? XD_UNKNOWN : new_state); 1501 mutex_exit(&vdp->xdf_dev_lk); 1502 1503 /* if we're closed now, let the other end know */ 1504 if (vdp->xdf_state == XD_CLOSED) 1505 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1506 } 1507 1508 1509 /* 1510 * Kick-off connect process 1511 * Status should be XD_UNKNOWN or XD_CLOSED 1512 * On success, status will be changed to XD_INIT 1513 * On error, it will be changed to XD_UNKNOWN 1514 */ 1515 static int 1516 xdf_setstate_init(xdf_t *vdp) 1517 { 1518 dev_info_t *dip = vdp->xdf_dip; 1519 xenbus_transaction_t xbt; 1520 grant_ref_t gref; 1521 char *xsname, *str; 1522 int rv; 1523 1524 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1525 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1526 ASSERT((vdp->xdf_state == XD_UNKNOWN) || 1527 (vdp->xdf_state == XD_CLOSED)); 1528 1529 DPRINTF(DDI_DBG, 1530 ("xdf@%s: starting connection process\n", vdp->xdf_addr)); 1531 1532 /* 1533 * If an eject is pending then don't allow a new connection. 1534 * (Only the backend can clear media request eject request.) 1535 */ 1536 if (xdf_eject_pending(vdp)) 1537 return (DDI_FAILURE); 1538 1539 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1540 goto errout; 1541 1542 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == INVALID_DOMID) 1543 goto errout; 1544 1545 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialising); 1546 1547 /* 1548 * Sanity check for the existance of the xenbus device-type property. 1549 * This property might not exist if we our xenbus device nodes was 1550 * force destroyed while we were still connected to the backend. 1551 */ 1552 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) 1553 goto errout; 1554 strfree(str); 1555 1556 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) 1557 goto errout; 1558 1559 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1560 #ifdef XPV_HVM_DRIVER 1561 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1562 #else /* !XPV_HVM_DRIVER */ 1563 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1564 DDI_SUCCESS) { 1565 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_init: " 1566 "failed to add intr handler", vdp->xdf_addr); 1567 goto errout1; 1568 } 1569 #endif /* !XPV_HVM_DRIVER */ 1570 1571 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1572 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1573 DDI_SUCCESS) { 1574 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1575 vdp->xdf_addr); 1576 goto errout2; 1577 } 1578 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1579 1580 /* 1581 * Write into xenstore the info needed by backend 1582 */ 1583 trans_retry: 1584 if (xenbus_transaction_start(&xbt)) { 1585 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1586 vdp->xdf_addr); 1587 xvdi_fatal_error(dip, EIO, "connect transaction init"); 1588 goto fail_trans; 1589 } 1590 1591 /* 1592 * XBP_PROTOCOL is written by the domain builder in the case of PV 1593 * domains. However, it is not written for HVM domains, so let's 1594 * write it here. 1595 */ 1596 if (((rv = xenbus_printf(xbt, xsname, 1597 XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE)) != 0) || 1598 ((rv = xenbus_printf(xbt, xsname, 1599 XBP_RING_REF, "%u", gref)) != 0) || 1600 ((rv = xenbus_printf(xbt, xsname, 1601 XBP_EVENT_CHAN, "%u", vdp->xdf_evtchn)) != 0) || 1602 ((rv = xenbus_printf(xbt, xsname, 1603 XBP_PROTOCOL, "%s", XEN_IO_PROTO_ABI_NATIVE)) != 0) || 1604 ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0)) { 1605 (void) xenbus_transaction_end(xbt, 1); 1606 xvdi_fatal_error(dip, rv, "connect transaction setup"); 1607 goto fail_trans; 1608 } 1609 1610 /* kick-off connect process */ 1611 if (rv = xenbus_transaction_end(xbt, 0)) { 1612 if (rv == EAGAIN) 1613 goto trans_retry; 1614 xvdi_fatal_error(dip, rv, "connect transaction commit"); 1615 goto fail_trans; 1616 } 1617 1618 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1619 mutex_enter(&vdp->xdf_dev_lk); 1620 xdf_set_state(vdp, XD_INIT); 1621 mutex_exit(&vdp->xdf_dev_lk); 1622 1623 return (DDI_SUCCESS); 1624 1625 fail_trans: 1626 xvdi_free_ring(vdp->xdf_xb_ring); 1627 errout2: 1628 #ifdef XPV_HVM_DRIVER 1629 ec_unbind_evtchn(vdp->xdf_evtchn); 1630 #else /* !XPV_HVM_DRIVER */ 1631 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1632 #endif /* !XPV_HVM_DRIVER */ 1633 errout1: 1634 xvdi_free_evtchn(dip); 1635 vdp->xdf_evtchn = INVALID_EVTCHN; 1636 errout: 1637 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1638 cmn_err(CE_WARN, "xdf@%s: failed to start connection to backend", 1639 vdp->xdf_addr); 1640 return (DDI_FAILURE); 1641 } 1642 1643 int 1644 xdf_get_flush_block(xdf_t *vdp) 1645 { 1646 /* 1647 * Get a DEV_BSIZE aligned bufer 1648 */ 1649 vdp->xdf_flush_mem = kmem_alloc(vdp->xdf_xdev_secsize * 2, KM_SLEEP); 1650 vdp->xdf_cache_flush_block = 1651 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), 1652 (int)vdp->xdf_xdev_secsize); 1653 1654 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1655 xdf_flush_block, vdp->xdf_xdev_secsize, NULL) != 0) 1656 return (DDI_FAILURE); 1657 return (DDI_SUCCESS); 1658 } 1659 1660 static void 1661 xdf_setstate_ready(void *arg) 1662 { 1663 xdf_t *vdp = (xdf_t *)arg; 1664 1665 vdp->xdf_ready_tq_thread = curthread; 1666 1667 /* 1668 * We've created all the minor nodes via cmlb_attach() using default 1669 * value in xdf_attach() to make it possible to block in xdf_open(), 1670 * in case there's anyone (say, booting thread) ever trying to open 1671 * it before connected to backend. We will refresh all those minor 1672 * nodes w/ latest info we've got now when we are almost connected. 1673 */ 1674 mutex_enter(&vdp->xdf_dev_lk); 1675 if (vdp->xdf_cmbl_reattach) { 1676 vdp->xdf_cmbl_reattach = B_FALSE; 1677 1678 mutex_exit(&vdp->xdf_dev_lk); 1679 if (xdf_cmlb_attach(vdp) != 0) { 1680 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1681 return; 1682 } 1683 mutex_enter(&vdp->xdf_dev_lk); 1684 } 1685 1686 /* If we're not still trying to get to the ready state, then bail. */ 1687 if (vdp->xdf_state != XD_CONNECTED) { 1688 mutex_exit(&vdp->xdf_dev_lk); 1689 return; 1690 } 1691 mutex_exit(&vdp->xdf_dev_lk); 1692 1693 /* 1694 * If backend has feature-barrier, see if it supports disk 1695 * cache flush op. 1696 */ 1697 vdp->xdf_flush_supported = B_FALSE; 1698 if (vdp->xdf_feature_barrier) { 1699 /* 1700 * Pretend we already know flush is supported so probe 1701 * will attempt the correct op. 1702 */ 1703 vdp->xdf_flush_supported = B_TRUE; 1704 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1705 vdp->xdf_flush_supported = B_TRUE; 1706 } else { 1707 vdp->xdf_flush_supported = B_FALSE; 1708 /* 1709 * If the other end does not support the cache flush op 1710 * then we must use a barrier-write to force disk 1711 * cache flushing. Barrier writes require that a data 1712 * block actually be written. 1713 * Cache a block to barrier-write when we are 1714 * asked to perform a flush. 1715 * XXX - would it be better to just copy 1 block 1716 * (512 bytes) from whatever write we did last 1717 * and rewrite that block? 1718 */ 1719 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) { 1720 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1721 return; 1722 } 1723 } 1724 } 1725 1726 mutex_enter(&vdp->xdf_cb_lk); 1727 mutex_enter(&vdp->xdf_dev_lk); 1728 if (vdp->xdf_state == XD_CONNECTED) 1729 xdf_set_state(vdp, XD_READY); 1730 mutex_exit(&vdp->xdf_dev_lk); 1731 1732 /* Restart any currently queued up io */ 1733 xdf_io_start(vdp); 1734 1735 mutex_exit(&vdp->xdf_cb_lk); 1736 } 1737 1738 /* 1739 * synthetic geometry 1740 */ 1741 #define XDF_NSECTS 256 1742 #define XDF_NHEADS 16 1743 1744 static void 1745 xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp) 1746 { 1747 xdf_t *vdp; 1748 uint_t ncyl; 1749 1750 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 1751 1752 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); 1753 1754 bzero(geomp, sizeof (*geomp)); 1755 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1756 geomp->g_acyl = 0; 1757 geomp->g_nhead = XDF_NHEADS; 1758 geomp->g_nsect = XDF_NSECTS; 1759 geomp->g_secsize = vdp->xdf_xdev_secsize; 1760 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1761 geomp->g_intrlv = 0; 1762 geomp->g_rpm = 7200; 1763 } 1764 1765 /* 1766 * Finish other initialization after we've connected to backend 1767 * Status should be XD_INIT before calling this routine 1768 * On success, status should be changed to XD_CONNECTED. 1769 * On error, status should stay XD_INIT 1770 */ 1771 static int 1772 xdf_setstate_connected(xdf_t *vdp) 1773 { 1774 dev_info_t *dip = vdp->xdf_dip; 1775 cmlb_geom_t pgeom; 1776 diskaddr_t nblocks = 0; 1777 uint_t secsize = 0; 1778 char *oename, *xsname, *str; 1779 uint_t dinfo; 1780 1781 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1782 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1783 ASSERT(vdp->xdf_state == XD_INIT); 1784 1785 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1786 ((oename = xvdi_get_oename(dip)) == NULL)) 1787 return (DDI_FAILURE); 1788 1789 /* Make sure the other end is XenbusStateConnected */ 1790 if (xenbus_read_driver_state(oename) != XenbusStateConnected) 1791 return (DDI_FAILURE); 1792 1793 /* Determine if feature barrier is supported by backend */ 1794 if (!(vdp->xdf_feature_barrier = xenbus_exists(oename, XBP_FB))) 1795 cmn_err(CE_NOTE, "!xdf@%s: feature-barrier not supported", 1796 vdp->xdf_addr); 1797 1798 /* 1799 * Probe backend. Read the device size into xdf_xdev_nblocks 1800 * and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE 1801 * flags in xdf_dinfo. If the emulated device type is "cdrom", 1802 * we always set VDISK_CDROM, regardless of if it's present in 1803 * the xenbus info parameter. 1804 */ 1805 if (xenbus_gather(XBT_NULL, oename, 1806 XBP_SECTORS, "%"SCNu64, &nblocks, 1807 XBP_SECTOR_SIZE, "%u", &secsize, 1808 XBP_INFO, "%u", &dinfo, 1809 NULL) != 0) { 1810 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1811 "cannot read backend info", vdp->xdf_addr); 1812 return (DDI_FAILURE); 1813 } 1814 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 1815 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 1816 vdp->xdf_addr); 1817 return (DDI_FAILURE); 1818 } 1819 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 1820 dinfo |= VDISK_CDROM; 1821 strfree(str); 1822 1823 if (secsize == 0 || !(ISP2(secsize / DEV_BSIZE))) 1824 secsize = DEV_BSIZE; 1825 vdp->xdf_xdev_nblocks = nblocks; 1826 vdp->xdf_xdev_secsize = secsize; 1827 #ifdef _ILP32 1828 if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) { 1829 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1830 "backend disk device too large with %llu blocks for" 1831 " 32-bit kernel", vdp->xdf_addr, vdp->xdf_xdev_nblocks); 1832 xvdi_fatal_error(dip, EFBIG, "reading backend info"); 1833 return (DDI_FAILURE); 1834 } 1835 #endif 1836 1837 /* 1838 * If the physical geometry for a fixed disk has been explicity 1839 * set then make sure that the specified physical geometry isn't 1840 * larger than the device we connected to. 1841 */ 1842 if (vdp->xdf_pgeom_fixed && 1843 (vdp->xdf_pgeom.g_capacity > vdp->xdf_xdev_nblocks)) { 1844 cmn_err(CE_WARN, 1845 "xdf@%s: connect failed, fixed geometry too large", 1846 vdp->xdf_addr); 1847 return (DDI_FAILURE); 1848 } 1849 1850 vdp->xdf_media_req_supported = xenbus_exists(oename, XBP_MEDIA_REQ_SUP); 1851 1852 /* mark vbd is ready for I/O */ 1853 mutex_enter(&vdp->xdf_dev_lk); 1854 xdf_set_state(vdp, XD_CONNECTED); 1855 1856 /* check if the cmlb label should be updated */ 1857 xdf_synthetic_pgeom(dip, &pgeom); 1858 if ((vdp->xdf_dinfo != dinfo) || 1859 (!vdp->xdf_pgeom_fixed && 1860 (memcmp(&vdp->xdf_pgeom, &pgeom, sizeof (pgeom)) != 0))) { 1861 vdp->xdf_cmbl_reattach = B_TRUE; 1862 1863 vdp->xdf_dinfo = dinfo; 1864 if (!vdp->xdf_pgeom_fixed) 1865 vdp->xdf_pgeom = pgeom; 1866 } 1867 1868 if (XD_IS_CD(vdp) || XD_IS_RM(vdp)) { 1869 if (vdp->xdf_xdev_nblocks == 0) { 1870 vdp->xdf_mstate = DKIO_EJECTED; 1871 cv_broadcast(&vdp->xdf_mstate_cv); 1872 } else { 1873 vdp->xdf_mstate = DKIO_INSERTED; 1874 cv_broadcast(&vdp->xdf_mstate_cv); 1875 } 1876 } else { 1877 if (vdp->xdf_mstate != DKIO_NONE) { 1878 vdp->xdf_mstate = DKIO_NONE; 1879 cv_broadcast(&vdp->xdf_mstate_cv); 1880 } 1881 } 1882 1883 mutex_exit(&vdp->xdf_dev_lk); 1884 1885 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", vdp->xdf_addr, 1886 (uint64_t)vdp->xdf_xdev_nblocks); 1887 1888 /* Restart any currently queued up io */ 1889 xdf_io_start(vdp); 1890 1891 /* 1892 * To get to the ready state we have to do IO to the backend device, 1893 * but we can't initiate IO from the other end change callback thread 1894 * (which is the current context we're executing in.) This is because 1895 * if the other end disconnects while we're doing IO from the callback 1896 * thread, then we can't recieve that disconnect event and we hang 1897 * waiting for an IO that can never complete. 1898 */ 1899 (void) ddi_taskq_dispatch(vdp->xdf_ready_tq, xdf_setstate_ready, vdp, 1900 DDI_SLEEP); 1901 1902 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1903 return (DDI_SUCCESS); 1904 } 1905 1906 /*ARGSUSED*/ 1907 static void 1908 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 1909 { 1910 XenbusState new_state = *(XenbusState *)impl_data; 1911 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 1912 1913 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 1914 vdp->xdf_addr, new_state)); 1915 1916 mutex_enter(&vdp->xdf_cb_lk); 1917 1918 /* We assume that this callback is single threaded */ 1919 ASSERT(vdp->xdf_oe_change_thread == NULL); 1920 DEBUG_EVAL(vdp->xdf_oe_change_thread = curthread); 1921 1922 /* ignore any backend state changes if we're suspending/suspended */ 1923 if (vdp->xdf_suspending || (vdp->xdf_state == XD_SUSPEND)) { 1924 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1925 mutex_exit(&vdp->xdf_cb_lk); 1926 return; 1927 } 1928 1929 switch (new_state) { 1930 case XenbusStateUnknown: 1931 case XenbusStateInitialising: 1932 case XenbusStateInitWait: 1933 case XenbusStateInitialised: 1934 if (vdp->xdf_state == XD_INIT) 1935 break; 1936 1937 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1938 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1939 break; 1940 ASSERT(vdp->xdf_state == XD_INIT); 1941 break; 1942 1943 case XenbusStateConnected: 1944 if ((vdp->xdf_state == XD_CONNECTED) || 1945 (vdp->xdf_state == XD_READY)) 1946 break; 1947 1948 if (vdp->xdf_state != XD_INIT) { 1949 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1950 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1951 break; 1952 ASSERT(vdp->xdf_state == XD_INIT); 1953 } 1954 1955 if (xdf_setstate_connected(vdp) != DDI_SUCCESS) { 1956 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1957 break; 1958 } 1959 ASSERT(vdp->xdf_state == XD_CONNECTED); 1960 break; 1961 1962 case XenbusStateClosing: 1963 if (xdf_isopen(vdp, -1)) { 1964 cmn_err(CE_NOTE, 1965 "xdf@%s: hot-unplug failed, still in use", 1966 vdp->xdf_addr); 1967 break; 1968 } 1969 /*FALLTHROUGH*/ 1970 case XenbusStateClosed: 1971 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 1972 break; 1973 } 1974 1975 /* notify anybody waiting for oe state change */ 1976 cv_broadcast(&vdp->xdf_dev_cv); 1977 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1978 mutex_exit(&vdp->xdf_cb_lk); 1979 } 1980 1981 static int 1982 xdf_connect_locked(xdf_t *vdp, boolean_t wait) 1983 { 1984 int rv, timeouts = 0, reset = 20; 1985 1986 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1987 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1988 1989 /* we can't connect once we're in the closed state */ 1990 if (vdp->xdf_state == XD_CLOSED) 1991 return (XD_CLOSED); 1992 1993 vdp->xdf_connect_req++; 1994 while (vdp->xdf_state != XD_READY) { 1995 mutex_exit(&vdp->xdf_dev_lk); 1996 1997 /* only one thread at a time can be the connection thread */ 1998 if (vdp->xdf_connect_thread == NULL) 1999 vdp->xdf_connect_thread = curthread; 2000 2001 if (vdp->xdf_connect_thread == curthread) { 2002 if ((timeouts > 0) && ((timeouts % reset) == 0)) { 2003 /* 2004 * If we haven't establised a connection 2005 * within the reset time, then disconnect 2006 * so we can try again, and double the reset 2007 * time. The reset time starts at 2 sec. 2008 */ 2009 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 2010 reset *= 2; 2011 } 2012 if (vdp->xdf_state == XD_UNKNOWN) 2013 (void) xdf_setstate_init(vdp); 2014 if (vdp->xdf_state == XD_INIT) 2015 (void) xdf_setstate_connected(vdp); 2016 } 2017 2018 mutex_enter(&vdp->xdf_dev_lk); 2019 if (!wait || (vdp->xdf_state == XD_READY)) 2020 goto out; 2021 2022 mutex_exit((&vdp->xdf_cb_lk)); 2023 if (vdp->xdf_connect_thread != curthread) { 2024 rv = cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk); 2025 } else { 2026 /* delay for 0.1 sec */ 2027 rv = cv_reltimedwait_sig(&vdp->xdf_dev_cv, 2028 &vdp->xdf_dev_lk, drv_usectohz(100*1000), 2029 TR_CLOCK_TICK); 2030 if (rv == -1) 2031 timeouts++; 2032 } 2033 mutex_exit((&vdp->xdf_dev_lk)); 2034 mutex_enter((&vdp->xdf_cb_lk)); 2035 mutex_enter((&vdp->xdf_dev_lk)); 2036 if (rv == 0) 2037 goto out; 2038 } 2039 2040 out: 2041 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2042 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 2043 2044 if (vdp->xdf_connect_thread == curthread) { 2045 /* 2046 * wake up someone else so they can become the connection 2047 * thread. 2048 */ 2049 cv_signal(&vdp->xdf_dev_cv); 2050 vdp->xdf_connect_thread = NULL; 2051 } 2052 2053 /* Try to lock the media */ 2054 mutex_exit((&vdp->xdf_dev_lk)); 2055 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2056 mutex_enter((&vdp->xdf_dev_lk)); 2057 2058 vdp->xdf_connect_req--; 2059 return (vdp->xdf_state); 2060 } 2061 2062 static uint_t 2063 xdf_iorestart(caddr_t arg) 2064 { 2065 xdf_t *vdp = (xdf_t *)arg; 2066 2067 ASSERT(vdp != NULL); 2068 2069 mutex_enter(&vdp->xdf_dev_lk); 2070 ASSERT(ISDMACBON(vdp)); 2071 SETDMACBOFF(vdp); 2072 mutex_exit(&vdp->xdf_dev_lk); 2073 2074 xdf_io_start(vdp); 2075 2076 return (DDI_INTR_CLAIMED); 2077 } 2078 2079 #if defined(XPV_HVM_DRIVER) 2080 2081 typedef struct xdf_hvm_entry { 2082 list_node_t xdf_he_list; 2083 char *xdf_he_path; 2084 dev_info_t *xdf_he_dip; 2085 } xdf_hvm_entry_t; 2086 2087 static list_t xdf_hvm_list; 2088 static kmutex_t xdf_hvm_list_lock; 2089 2090 static xdf_hvm_entry_t * 2091 i_xdf_hvm_find(const char *path, dev_info_t *dip) 2092 { 2093 xdf_hvm_entry_t *i; 2094 2095 ASSERT((path != NULL) || (dip != NULL)); 2096 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); 2097 2098 i = list_head(&xdf_hvm_list); 2099 while (i != NULL) { 2100 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { 2101 i = list_next(&xdf_hvm_list, i); 2102 continue; 2103 } 2104 if ((dip != NULL) && (i->xdf_he_dip != dip)) { 2105 i = list_next(&xdf_hvm_list, i); 2106 continue; 2107 } 2108 break; 2109 } 2110 return (i); 2111 } 2112 2113 dev_info_t * 2114 xdf_hvm_hold(const char *path) 2115 { 2116 xdf_hvm_entry_t *i; 2117 dev_info_t *dip; 2118 2119 mutex_enter(&xdf_hvm_list_lock); 2120 i = i_xdf_hvm_find(path, NULL); 2121 if (i == NULL) { 2122 mutex_exit(&xdf_hvm_list_lock); 2123 return (B_FALSE); 2124 } 2125 ndi_hold_devi(dip = i->xdf_he_dip); 2126 mutex_exit(&xdf_hvm_list_lock); 2127 return (dip); 2128 } 2129 2130 static void 2131 xdf_hvm_add(dev_info_t *dip) 2132 { 2133 xdf_hvm_entry_t *i; 2134 char *path; 2135 2136 /* figure out the path for the dip */ 2137 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 2138 (void) ddi_pathname(dip, path); 2139 2140 i = kmem_alloc(sizeof (*i), KM_SLEEP); 2141 i->xdf_he_dip = dip; 2142 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); 2143 2144 mutex_enter(&xdf_hvm_list_lock); 2145 ASSERT(i_xdf_hvm_find(path, NULL) == NULL); 2146 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); 2147 list_insert_head(&xdf_hvm_list, i); 2148 mutex_exit(&xdf_hvm_list_lock); 2149 2150 kmem_free(path, MAXPATHLEN); 2151 } 2152 2153 static void 2154 xdf_hvm_rm(dev_info_t *dip) 2155 { 2156 xdf_hvm_entry_t *i; 2157 2158 mutex_enter(&xdf_hvm_list_lock); 2159 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); 2160 list_remove(&xdf_hvm_list, i); 2161 mutex_exit(&xdf_hvm_list_lock); 2162 2163 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); 2164 kmem_free(i, sizeof (*i)); 2165 } 2166 2167 static void 2168 xdf_hvm_init(void) 2169 { 2170 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), 2171 offsetof(xdf_hvm_entry_t, xdf_he_list)); 2172 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); 2173 } 2174 2175 static void 2176 xdf_hvm_fini(void) 2177 { 2178 ASSERT(list_head(&xdf_hvm_list) == NULL); 2179 list_destroy(&xdf_hvm_list); 2180 mutex_destroy(&xdf_hvm_list_lock); 2181 } 2182 2183 boolean_t 2184 xdf_hvm_connect(dev_info_t *dip) 2185 { 2186 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2187 char *oename, *str; 2188 int rv; 2189 2190 mutex_enter(&vdp->xdf_cb_lk); 2191 2192 /* 2193 * Before try to establish a connection we need to wait for the 2194 * backend hotplug scripts to have run. Once they are run the 2195 * "<oename>/hotplug-status" property will be set to "connected". 2196 */ 2197 for (;;) { 2198 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2199 2200 /* 2201 * Get the xenbus path to the backend device. Note that 2202 * we can't cache this path (and we look it up on each pass 2203 * through this loop) because it could change during 2204 * suspend, resume, and migration operations. 2205 */ 2206 if ((oename = xvdi_get_oename(dip)) == NULL) { 2207 mutex_exit(&vdp->xdf_cb_lk); 2208 return (B_FALSE); 2209 } 2210 2211 str = NULL; 2212 if ((xenbus_read_str(oename, XBP_HP_STATUS, &str) == 0) && 2213 (strcmp(str, XBV_HP_STATUS_CONN) == 0)) 2214 break; 2215 2216 if (str != NULL) 2217 strfree(str); 2218 2219 /* wait for an update to "<oename>/hotplug-status" */ 2220 if (cv_wait_sig(&vdp->xdf_hp_status_cv, &vdp->xdf_cb_lk) == 0) { 2221 /* we got interrupted by a signal */ 2222 mutex_exit(&vdp->xdf_cb_lk); 2223 return (B_FALSE); 2224 } 2225 } 2226 2227 /* Good news. The backend hotplug scripts have been run. */ 2228 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2229 ASSERT(strcmp(str, XBV_HP_STATUS_CONN) == 0); 2230 strfree(str); 2231 2232 /* 2233 * If we're emulating a cd device and if the backend doesn't support 2234 * media request opreations, then we're not going to bother trying 2235 * to establish a connection for a couple reasons. First off, media 2236 * requests support is required to support operations like eject and 2237 * media locking. Second, other backend platforms like Linux don't 2238 * support hvm pv cdrom access. They don't even have a backend pv 2239 * driver for cdrom device nodes, so we don't want to block forever 2240 * waiting for a connection to a backend driver that doesn't exist. 2241 */ 2242 if (XD_IS_CD(vdp) && !xenbus_exists(oename, XBP_MEDIA_REQ_SUP)) { 2243 mutex_exit(&vdp->xdf_cb_lk); 2244 return (B_FALSE); 2245 } 2246 2247 mutex_enter(&vdp->xdf_dev_lk); 2248 rv = xdf_connect_locked(vdp, B_TRUE); 2249 mutex_exit(&vdp->xdf_dev_lk); 2250 mutex_exit(&vdp->xdf_cb_lk); 2251 2252 return ((rv == XD_READY) ? B_TRUE : B_FALSE); 2253 } 2254 2255 int 2256 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2257 { 2258 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2259 2260 /* sanity check the requested physical geometry */ 2261 mutex_enter(&vdp->xdf_dev_lk); 2262 if ((geomp->g_secsize != XB_BSIZE) || 2263 (geomp->g_capacity == 0)) { 2264 mutex_exit(&vdp->xdf_dev_lk); 2265 return (EINVAL); 2266 } 2267 2268 /* 2269 * If we've already connected to the backend device then make sure 2270 * we're not defining a physical geometry larger than our backend 2271 * device. 2272 */ 2273 if ((vdp->xdf_xdev_nblocks != 0) && 2274 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { 2275 mutex_exit(&vdp->xdf_dev_lk); 2276 return (EINVAL); 2277 } 2278 2279 bzero(&vdp->xdf_pgeom, sizeof (vdp->xdf_pgeom)); 2280 vdp->xdf_pgeom.g_ncyl = geomp->g_ncyl; 2281 vdp->xdf_pgeom.g_acyl = geomp->g_acyl; 2282 vdp->xdf_pgeom.g_nhead = geomp->g_nhead; 2283 vdp->xdf_pgeom.g_nsect = geomp->g_nsect; 2284 vdp->xdf_pgeom.g_secsize = geomp->g_secsize; 2285 vdp->xdf_pgeom.g_capacity = geomp->g_capacity; 2286 vdp->xdf_pgeom.g_intrlv = geomp->g_intrlv; 2287 vdp->xdf_pgeom.g_rpm = geomp->g_rpm; 2288 2289 vdp->xdf_pgeom_fixed = B_TRUE; 2290 mutex_exit(&vdp->xdf_dev_lk); 2291 2292 /* force a re-validation */ 2293 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 2294 2295 return (0); 2296 } 2297 2298 boolean_t 2299 xdf_is_cd(dev_info_t *dip) 2300 { 2301 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2302 boolean_t rv; 2303 2304 mutex_enter(&vdp->xdf_cb_lk); 2305 rv = XD_IS_CD(vdp); 2306 mutex_exit(&vdp->xdf_cb_lk); 2307 return (rv); 2308 } 2309 2310 boolean_t 2311 xdf_is_rm(dev_info_t *dip) 2312 { 2313 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2314 boolean_t rv; 2315 2316 mutex_enter(&vdp->xdf_cb_lk); 2317 rv = XD_IS_RM(vdp); 2318 mutex_exit(&vdp->xdf_cb_lk); 2319 return (rv); 2320 } 2321 2322 boolean_t 2323 xdf_media_req_supported(dev_info_t *dip) 2324 { 2325 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2326 boolean_t rv; 2327 2328 mutex_enter(&vdp->xdf_cb_lk); 2329 rv = vdp->xdf_media_req_supported; 2330 mutex_exit(&vdp->xdf_cb_lk); 2331 return (rv); 2332 } 2333 2334 #endif /* XPV_HVM_DRIVER */ 2335 2336 static int 2337 xdf_lb_getcap(dev_info_t *dip, diskaddr_t *capp) 2338 { 2339 xdf_t *vdp; 2340 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2341 2342 if (vdp == NULL) 2343 return (ENXIO); 2344 2345 mutex_enter(&vdp->xdf_dev_lk); 2346 *capp = vdp->xdf_pgeom.g_capacity; 2347 DPRINTF(LBL_DBG, ("xdf@%s:capacity %llu\n", vdp->xdf_addr, *capp)); 2348 mutex_exit(&vdp->xdf_dev_lk); 2349 return (0); 2350 } 2351 2352 static int 2353 xdf_lb_getpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2354 { 2355 xdf_t *vdp; 2356 2357 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 2358 return (ENXIO); 2359 *geomp = vdp->xdf_pgeom; 2360 return (0); 2361 } 2362 2363 /* 2364 * No real HBA, no geometry available from it 2365 */ 2366 /*ARGSUSED*/ 2367 static int 2368 xdf_lb_getvgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2369 { 2370 return (EINVAL); 2371 } 2372 2373 static int 2374 xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep) 2375 { 2376 xdf_t *vdp; 2377 2378 if (!(vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)))) 2379 return (ENXIO); 2380 2381 if (XD_IS_RO(vdp)) 2382 tgattributep->media_is_writable = 0; 2383 else 2384 tgattributep->media_is_writable = 1; 2385 tgattributep->media_is_rotational = 0; 2386 return (0); 2387 } 2388 2389 /* ARGSUSED3 */ 2390 int 2391 xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 2392 { 2393 int instance; 2394 xdf_t *vdp; 2395 2396 instance = ddi_get_instance(dip); 2397 2398 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) 2399 return (ENXIO); 2400 2401 switch (cmd) { 2402 case TG_GETPHYGEOM: 2403 return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg)); 2404 case TG_GETVIRTGEOM: 2405 return (xdf_lb_getvgeom(dip, (cmlb_geom_t *)arg)); 2406 case TG_GETCAPACITY: 2407 return (xdf_lb_getcap(dip, (diskaddr_t *)arg)); 2408 case TG_GETBLOCKSIZE: 2409 mutex_enter(&vdp->xdf_cb_lk); 2410 *(uint32_t *)arg = vdp->xdf_xdev_secsize; 2411 mutex_exit(&vdp->xdf_cb_lk); 2412 return (0); 2413 case TG_GETATTR: 2414 return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg)); 2415 default: 2416 return (ENOTTY); 2417 } 2418 } 2419 2420 /* ARGSUSED5 */ 2421 int 2422 xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp, 2423 diskaddr_t start, size_t reqlen, void *tg_cookie) 2424 { 2425 xdf_t *vdp; 2426 struct buf *bp; 2427 int err = 0; 2428 2429 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2430 2431 /* We don't allow IO from the oe_change callback thread */ 2432 ASSERT(curthread != vdp->xdf_oe_change_thread); 2433 2434 if ((start + ((reqlen / (vdp->xdf_xdev_secsize / DEV_BSIZE)) 2435 >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) 2436 return (EINVAL); 2437 2438 bp = getrbuf(KM_SLEEP); 2439 if (cmd == TG_READ) 2440 bp->b_flags = B_BUSY | B_READ; 2441 else 2442 bp->b_flags = B_BUSY | B_WRITE; 2443 2444 bp->b_un.b_addr = bufp; 2445 bp->b_bcount = reqlen; 2446 bp->b_blkno = start * (vdp->xdf_xdev_secsize / DEV_BSIZE); 2447 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 2448 2449 mutex_enter(&vdp->xdf_dev_lk); 2450 xdf_bp_push(vdp, bp); 2451 mutex_exit(&vdp->xdf_dev_lk); 2452 xdf_io_start(vdp); 2453 if (curthread == vdp->xdf_ready_tq_thread) 2454 (void) xdf_ring_drain(vdp); 2455 err = biowait(bp); 2456 ASSERT(bp->b_flags & B_DONE); 2457 freerbuf(bp); 2458 return (err); 2459 } 2460 2461 /* 2462 * Lock the current media. Set the media state to "lock". 2463 * (Media locks are only respected by the backend driver.) 2464 */ 2465 static int 2466 xdf_ioctl_mlock(xdf_t *vdp) 2467 { 2468 int rv; 2469 mutex_enter(&vdp->xdf_cb_lk); 2470 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2471 mutex_exit(&vdp->xdf_cb_lk); 2472 return (rv); 2473 } 2474 2475 /* 2476 * Release a media lock. Set the media state to "none". 2477 */ 2478 static int 2479 xdf_ioctl_munlock(xdf_t *vdp) 2480 { 2481 int rv; 2482 mutex_enter(&vdp->xdf_cb_lk); 2483 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_NONE, B_TRUE); 2484 mutex_exit(&vdp->xdf_cb_lk); 2485 return (rv); 2486 } 2487 2488 /* 2489 * Eject the current media. Ignores any media locks. (Media locks 2490 * are only for benifit of the the backend.) 2491 */ 2492 static int 2493 xdf_ioctl_eject(xdf_t *vdp) 2494 { 2495 int rv; 2496 2497 mutex_enter(&vdp->xdf_cb_lk); 2498 if ((rv = xdf_media_req(vdp, XBV_MEDIA_REQ_EJECT, B_FALSE)) != 0) { 2499 mutex_exit(&vdp->xdf_cb_lk); 2500 return (rv); 2501 } 2502 2503 /* 2504 * We've set the media requests xenbus parameter to eject, so now 2505 * disconnect from the backend, wait for the backend to clear 2506 * the media requets xenbus paramter, and then we can reconnect 2507 * to the backend. 2508 */ 2509 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 2510 mutex_enter(&vdp->xdf_dev_lk); 2511 if (xdf_connect_locked(vdp, B_TRUE) != XD_READY) { 2512 mutex_exit(&vdp->xdf_dev_lk); 2513 mutex_exit(&vdp->xdf_cb_lk); 2514 return (EIO); 2515 } 2516 mutex_exit(&vdp->xdf_dev_lk); 2517 mutex_exit(&vdp->xdf_cb_lk); 2518 return (0); 2519 } 2520 2521 /* 2522 * Watch for media state changes. This can be an insertion of a device 2523 * (triggered by a 'xm block-configure' request in another domain) or 2524 * the ejection of a device (triggered by a local "eject" operation). 2525 * For a full description of the DKIOCSTATE ioctl behavior see dkio(7I). 2526 */ 2527 static int 2528 xdf_dkstate(xdf_t *vdp, enum dkio_state mstate) 2529 { 2530 enum dkio_state prev_state; 2531 2532 mutex_enter(&vdp->xdf_cb_lk); 2533 prev_state = vdp->xdf_mstate; 2534 2535 if (vdp->xdf_mstate == mstate) { 2536 while (vdp->xdf_mstate == prev_state) { 2537 if (cv_wait_sig(&vdp->xdf_mstate_cv, 2538 &vdp->xdf_cb_lk) == 0) { 2539 mutex_exit(&vdp->xdf_cb_lk); 2540 return (EINTR); 2541 } 2542 } 2543 } 2544 2545 if ((prev_state != DKIO_INSERTED) && 2546 (vdp->xdf_mstate == DKIO_INSERTED)) { 2547 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2548 mutex_exit(&vdp->xdf_cb_lk); 2549 return (0); 2550 } 2551 2552 mutex_exit(&vdp->xdf_cb_lk); 2553 return (0); 2554 } 2555 2556 /*ARGSUSED*/ 2557 static int 2558 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 2559 int *rvalp) 2560 { 2561 minor_t minor = getminor(dev); 2562 int part = XDF_PART(minor); 2563 xdf_t *vdp; 2564 int rv; 2565 2566 if (((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) || 2567 (!xdf_isopen(vdp, part))) 2568 return (ENXIO); 2569 2570 DPRINTF(IOCTL_DBG, ("xdf@%s:ioctl: cmd %d (0x%x)\n", 2571 vdp->xdf_addr, cmd, cmd)); 2572 2573 switch (cmd) { 2574 default: 2575 return (ENOTTY); 2576 case DKIOCG_PHYGEOM: 2577 case DKIOCG_VIRTGEOM: 2578 case DKIOCGGEOM: 2579 case DKIOCSGEOM: 2580 case DKIOCGAPART: 2581 case DKIOCSAPART: 2582 case DKIOCGVTOC: 2583 case DKIOCSVTOC: 2584 case DKIOCPARTINFO: 2585 case DKIOCGEXTVTOC: 2586 case DKIOCSEXTVTOC: 2587 case DKIOCEXTPARTINFO: 2588 case DKIOCGMBOOT: 2589 case DKIOCSMBOOT: 2590 case DKIOCGETEFI: 2591 case DKIOCSETEFI: 2592 case DKIOCSETEXTPART: 2593 case DKIOCPARTITION: 2594 return (cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 2595 rvalp, NULL)); 2596 case FDEJECT: 2597 case DKIOCEJECT: 2598 case CDROMEJECT: 2599 return (xdf_ioctl_eject(vdp)); 2600 case DKIOCLOCK: 2601 return (xdf_ioctl_mlock(vdp)); 2602 case DKIOCUNLOCK: 2603 return (xdf_ioctl_munlock(vdp)); 2604 case CDROMREADOFFSET: { 2605 int offset = 0; 2606 if (!XD_IS_CD(vdp)) 2607 return (ENOTTY); 2608 if (ddi_copyout(&offset, (void *)arg, sizeof (int), mode)) 2609 return (EFAULT); 2610 return (0); 2611 } 2612 case DKIOCGMEDIAINFO: { 2613 struct dk_minfo media_info; 2614 2615 media_info.dki_lbsize = vdp->xdf_xdev_secsize; 2616 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; 2617 if (XD_IS_CD(vdp)) 2618 media_info.dki_media_type = DK_CDROM; 2619 else 2620 media_info.dki_media_type = DK_FIXED_DISK; 2621 2622 if (ddi_copyout(&media_info, (void *)arg, 2623 sizeof (struct dk_minfo), mode)) 2624 return (EFAULT); 2625 return (0); 2626 } 2627 case DKIOCINFO: { 2628 struct dk_cinfo info; 2629 2630 /* controller information */ 2631 if (XD_IS_CD(vdp)) 2632 info.dki_ctype = DKC_CDROM; 2633 else 2634 info.dki_ctype = DKC_VBD; 2635 2636 info.dki_cnum = 0; 2637 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 2638 2639 /* unit information */ 2640 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 2641 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 2642 info.dki_flags = DKI_FMTVOL; 2643 info.dki_partition = part; 2644 info.dki_maxtransfer = maxphys / DEV_BSIZE; 2645 info.dki_addr = 0; 2646 info.dki_space = 0; 2647 info.dki_prio = 0; 2648 info.dki_vec = 0; 2649 2650 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 2651 return (EFAULT); 2652 return (0); 2653 } 2654 case DKIOCSTATE: { 2655 enum dkio_state mstate; 2656 2657 if (ddi_copyin((void *)arg, &mstate, 2658 sizeof (mstate), mode) != 0) 2659 return (EFAULT); 2660 if ((rv = xdf_dkstate(vdp, mstate)) != 0) 2661 return (rv); 2662 mstate = vdp->xdf_mstate; 2663 if (ddi_copyout(&mstate, (void *)arg, 2664 sizeof (mstate), mode) != 0) 2665 return (EFAULT); 2666 return (0); 2667 } 2668 case DKIOCREMOVABLE: { 2669 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2670 if (ddi_copyout(&i, (caddr_t)arg, sizeof (i), mode)) 2671 return (EFAULT); 2672 return (0); 2673 } 2674 case DKIOCGETWCE: { 2675 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2676 if (ddi_copyout(&i, (void *)arg, sizeof (i), mode)) 2677 return (EFAULT); 2678 return (0); 2679 } 2680 case DKIOCSETWCE: { 2681 int i; 2682 if (ddi_copyin((void *)arg, &i, sizeof (i), mode)) 2683 return (EFAULT); 2684 vdp->xdf_wce = VOID2BOOLEAN(i); 2685 return (0); 2686 } 2687 case DKIOCFLUSHWRITECACHE: { 2688 struct dk_callback *dkc = (struct dk_callback *)arg; 2689 2690 if (vdp->xdf_flush_supported) { 2691 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2692 NULL, 0, 0, (void *)dev); 2693 } else if (vdp->xdf_feature_barrier && 2694 !xdf_barrier_flush_disable) { 2695 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2696 vdp->xdf_cache_flush_block, xdf_flush_block, 2697 vdp->xdf_xdev_secsize, (void *)dev); 2698 } else { 2699 return (ENOTTY); 2700 } 2701 if ((mode & FKIOCTL) && (dkc != NULL) && 2702 (dkc->dkc_callback != NULL)) { 2703 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 2704 /* need to return 0 after calling callback */ 2705 rv = 0; 2706 } 2707 return (rv); 2708 } 2709 } 2710 /*NOTREACHED*/ 2711 } 2712 2713 static int 2714 xdf_strategy(struct buf *bp) 2715 { 2716 xdf_t *vdp; 2717 minor_t minor; 2718 diskaddr_t p_blkct, p_blkst; 2719 daddr_t blkno; 2720 ulong_t nblks; 2721 int part; 2722 2723 minor = getminor(bp->b_edev); 2724 part = XDF_PART(minor); 2725 vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor)); 2726 2727 mutex_enter(&vdp->xdf_dev_lk); 2728 if (!xdf_isopen(vdp, part)) { 2729 mutex_exit(&vdp->xdf_dev_lk); 2730 xdf_io_err(bp, ENXIO, 0); 2731 return (0); 2732 } 2733 2734 /* We don't allow IO from the oe_change callback thread */ 2735 ASSERT(curthread != vdp->xdf_oe_change_thread); 2736 2737 /* Check for writes to a read only device */ 2738 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 2739 mutex_exit(&vdp->xdf_dev_lk); 2740 xdf_io_err(bp, EROFS, 0); 2741 return (0); 2742 } 2743 2744 /* Check if this I/O is accessing a partition or the entire disk */ 2745 if ((long)bp->b_private == XB_SLICE_NONE) { 2746 /* This I/O is using an absolute offset */ 2747 p_blkct = vdp->xdf_xdev_nblocks; 2748 p_blkst = 0; 2749 } else { 2750 /* This I/O is using a partition relative offset */ 2751 mutex_exit(&vdp->xdf_dev_lk); 2752 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 2753 &p_blkst, NULL, NULL, NULL)) { 2754 xdf_io_err(bp, ENXIO, 0); 2755 return (0); 2756 } 2757 mutex_enter(&vdp->xdf_dev_lk); 2758 } 2759 2760 /* 2761 * Adjust the real blkno and bcount according to the underline 2762 * physical sector size. 2763 */ 2764 blkno = bp->b_blkno / (vdp->xdf_xdev_secsize / XB_BSIZE); 2765 2766 /* check for a starting block beyond the disk or partition limit */ 2767 if (blkno > p_blkct) { 2768 DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64, 2769 vdp->xdf_addr, (longlong_t)blkno, (uint64_t)p_blkct)); 2770 mutex_exit(&vdp->xdf_dev_lk); 2771 xdf_io_err(bp, EINVAL, 0); 2772 return (0); 2773 } 2774 2775 /* Legacy: don't set error flag at this case */ 2776 if (blkno == p_blkct) { 2777 mutex_exit(&vdp->xdf_dev_lk); 2778 bp->b_resid = bp->b_bcount; 2779 biodone(bp); 2780 return (0); 2781 } 2782 2783 /* sanitize the input buf */ 2784 bioerror(bp, 0); 2785 bp->b_resid = 0; 2786 bp->av_back = bp->av_forw = NULL; 2787 2788 /* Adjust for partial transfer, this will result in an error later */ 2789 if (vdp->xdf_xdev_secsize != 0 && 2790 vdp->xdf_xdev_secsize != XB_BSIZE) { 2791 nblks = bp->b_bcount / vdp->xdf_xdev_secsize; 2792 } else { 2793 nblks = bp->b_bcount >> XB_BSHIFT; 2794 } 2795 2796 if ((blkno + nblks) > p_blkct) { 2797 if (vdp->xdf_xdev_secsize != 0 && 2798 vdp->xdf_xdev_secsize != XB_BSIZE) { 2799 bp->b_resid = 2800 ((blkno + nblks) - p_blkct) * 2801 vdp->xdf_xdev_secsize; 2802 } else { 2803 bp->b_resid = 2804 ((blkno + nblks) - p_blkct) << 2805 XB_BSHIFT; 2806 } 2807 bp->b_bcount -= bp->b_resid; 2808 } 2809 2810 DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n", 2811 vdp->xdf_addr, (longlong_t)blkno, (ulong_t)bp->b_bcount)); 2812 2813 /* Fix up the buf struct */ 2814 bp->b_flags |= B_BUSY; 2815 bp->b_private = (void *)(uintptr_t)p_blkst; 2816 2817 xdf_bp_push(vdp, bp); 2818 mutex_exit(&vdp->xdf_dev_lk); 2819 xdf_io_start(vdp); 2820 if (do_polled_io) 2821 (void) xdf_ring_drain(vdp); 2822 return (0); 2823 } 2824 2825 /*ARGSUSED*/ 2826 static int 2827 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 2828 { 2829 xdf_t *vdp; 2830 minor_t minor; 2831 diskaddr_t p_blkcnt; 2832 int part; 2833 2834 minor = getminor(dev); 2835 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2836 return (ENXIO); 2837 2838 DPRINTF(IO_DBG, ("xdf@%s: read offset 0x%"PRIx64"\n", 2839 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2840 2841 part = XDF_PART(minor); 2842 if (!xdf_isopen(vdp, part)) 2843 return (ENXIO); 2844 2845 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2846 NULL, NULL, NULL, NULL)) 2847 return (ENXIO); 2848 2849 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2850 return (ENOSPC); 2851 2852 if (U_INVAL(uiop)) 2853 return (EINVAL); 2854 2855 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 2856 } 2857 2858 /*ARGSUSED*/ 2859 static int 2860 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 2861 { 2862 xdf_t *vdp; 2863 minor_t minor; 2864 diskaddr_t p_blkcnt; 2865 int part; 2866 2867 minor = getminor(dev); 2868 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2869 return (ENXIO); 2870 2871 DPRINTF(IO_DBG, ("xdf@%s: write offset 0x%"PRIx64"\n", 2872 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2873 2874 part = XDF_PART(minor); 2875 if (!xdf_isopen(vdp, part)) 2876 return (ENXIO); 2877 2878 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2879 NULL, NULL, NULL, NULL)) 2880 return (ENXIO); 2881 2882 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2883 return (ENOSPC); 2884 2885 if (U_INVAL(uiop)) 2886 return (EINVAL); 2887 2888 return (physio(xdf_strategy, NULL, dev, B_WRITE, xdfmin, uiop)); 2889 } 2890 2891 /*ARGSUSED*/ 2892 static int 2893 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 2894 { 2895 xdf_t *vdp; 2896 minor_t minor; 2897 struct uio *uiop = aiop->aio_uio; 2898 diskaddr_t p_blkcnt; 2899 int part; 2900 2901 minor = getminor(dev); 2902 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2903 return (ENXIO); 2904 2905 part = XDF_PART(minor); 2906 if (!xdf_isopen(vdp, part)) 2907 return (ENXIO); 2908 2909 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2910 NULL, NULL, NULL, NULL)) 2911 return (ENXIO); 2912 2913 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2914 return (ENOSPC); 2915 2916 if (U_INVAL(uiop)) 2917 return (EINVAL); 2918 2919 return (aphysio(xdf_strategy, anocancel, dev, B_READ, xdfmin, aiop)); 2920 } 2921 2922 /*ARGSUSED*/ 2923 static int 2924 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 2925 { 2926 xdf_t *vdp; 2927 minor_t minor; 2928 struct uio *uiop = aiop->aio_uio; 2929 diskaddr_t p_blkcnt; 2930 int part; 2931 2932 minor = getminor(dev); 2933 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2934 return (ENXIO); 2935 2936 part = XDF_PART(minor); 2937 if (!xdf_isopen(vdp, part)) 2938 return (ENXIO); 2939 2940 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2941 NULL, NULL, NULL, NULL)) 2942 return (ENXIO); 2943 2944 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2945 return (ENOSPC); 2946 2947 if (U_INVAL(uiop)) 2948 return (EINVAL); 2949 2950 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, xdfmin, aiop)); 2951 } 2952 2953 static int 2954 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 2955 { 2956 struct buf dumpbuf, *dbp = &dumpbuf; 2957 xdf_t *vdp; 2958 minor_t minor; 2959 int err = 0; 2960 int part; 2961 diskaddr_t p_blkcnt, p_blkst; 2962 2963 minor = getminor(dev); 2964 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2965 return (ENXIO); 2966 2967 DPRINTF(IO_DBG, ("xdf@%s: dump addr (0x%p) blk (%ld) nblks (%d)\n", 2968 vdp->xdf_addr, (void *)addr, blkno, nblk)); 2969 2970 /* We don't allow IO from the oe_change callback thread */ 2971 ASSERT(curthread != vdp->xdf_oe_change_thread); 2972 2973 part = XDF_PART(minor); 2974 if (!xdf_isopen(vdp, part)) 2975 return (ENXIO); 2976 2977 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 2978 NULL, NULL, NULL)) 2979 return (ENXIO); 2980 2981 if ((blkno + nblk) > 2982 (p_blkcnt * (vdp->xdf_xdev_secsize / XB_BSIZE))) { 2983 cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64, 2984 vdp->xdf_addr, (daddr_t)((blkno + nblk) / 2985 (vdp->xdf_xdev_secsize / XB_BSIZE)), (uint64_t)p_blkcnt); 2986 return (EINVAL); 2987 } 2988 2989 bioinit(dbp); 2990 dbp->b_flags = B_BUSY; 2991 dbp->b_un.b_addr = addr; 2992 dbp->b_bcount = nblk << DEV_BSHIFT; 2993 dbp->b_blkno = blkno; 2994 dbp->b_edev = dev; 2995 dbp->b_private = (void *)(uintptr_t)p_blkst; 2996 2997 mutex_enter(&vdp->xdf_dev_lk); 2998 xdf_bp_push(vdp, dbp); 2999 mutex_exit(&vdp->xdf_dev_lk); 3000 xdf_io_start(vdp); 3001 err = xdf_ring_drain(vdp); 3002 biofini(dbp); 3003 return (err); 3004 } 3005 3006 /*ARGSUSED*/ 3007 static int 3008 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 3009 { 3010 minor_t minor; 3011 xdf_t *vdp; 3012 int part; 3013 ulong_t parbit; 3014 3015 minor = getminor(dev); 3016 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 3017 return (ENXIO); 3018 3019 mutex_enter(&vdp->xdf_dev_lk); 3020 part = XDF_PART(minor); 3021 if (!xdf_isopen(vdp, part)) { 3022 mutex_exit(&vdp->xdf_dev_lk); 3023 return (ENXIO); 3024 } 3025 parbit = 1 << part; 3026 3027 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); 3028 if (otyp == OTYP_LYR) { 3029 ASSERT(vdp->xdf_vd_lyropen[part] > 0); 3030 if (--vdp->xdf_vd_lyropen[part] == 0) 3031 vdp->xdf_vd_open[otyp] &= ~parbit; 3032 } else { 3033 vdp->xdf_vd_open[otyp] &= ~parbit; 3034 } 3035 vdp->xdf_vd_exclopen &= ~parbit; 3036 3037 mutex_exit(&vdp->xdf_dev_lk); 3038 return (0); 3039 } 3040 3041 static int 3042 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 3043 { 3044 minor_t minor; 3045 xdf_t *vdp; 3046 int part; 3047 ulong_t parbit; 3048 diskaddr_t p_blkct = 0; 3049 boolean_t firstopen; 3050 boolean_t nodelay; 3051 3052 minor = getminor(*devp); 3053 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 3054 return (ENXIO); 3055 3056 nodelay = (flag & (FNDELAY | FNONBLOCK)); 3057 3058 DPRINTF(DDI_DBG, ("xdf@%s: opening\n", vdp->xdf_addr)); 3059 3060 /* do cv_wait until connected or failed */ 3061 mutex_enter(&vdp->xdf_cb_lk); 3062 mutex_enter(&vdp->xdf_dev_lk); 3063 if (!nodelay && (xdf_connect_locked(vdp, B_TRUE) != XD_READY)) { 3064 mutex_exit(&vdp->xdf_dev_lk); 3065 mutex_exit(&vdp->xdf_cb_lk); 3066 return (ENXIO); 3067 } 3068 mutex_exit(&vdp->xdf_cb_lk); 3069 3070 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 3071 mutex_exit(&vdp->xdf_dev_lk); 3072 return (EROFS); 3073 } 3074 3075 part = XDF_PART(minor); 3076 parbit = 1 << part; 3077 if ((vdp->xdf_vd_exclopen & parbit) || 3078 ((flag & FEXCL) && xdf_isopen(vdp, part))) { 3079 mutex_exit(&vdp->xdf_dev_lk); 3080 return (EBUSY); 3081 } 3082 3083 /* are we the first one to open this node? */ 3084 firstopen = !xdf_isopen(vdp, -1); 3085 3086 if (otyp == OTYP_LYR) 3087 vdp->xdf_vd_lyropen[part]++; 3088 3089 vdp->xdf_vd_open[otyp] |= parbit; 3090 3091 if (flag & FEXCL) 3092 vdp->xdf_vd_exclopen |= parbit; 3093 3094 mutex_exit(&vdp->xdf_dev_lk); 3095 3096 /* force a re-validation */ 3097 if (firstopen) 3098 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 3099 3100 /* If this is a non-blocking open then we're done */ 3101 if (nodelay) 3102 return (0); 3103 3104 /* 3105 * This is a blocking open, so we require: 3106 * - that the disk have a valid label on it 3107 * - that the size of the partition that we're opening is non-zero 3108 */ 3109 if ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 3110 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0)) { 3111 (void) xdf_close(*devp, flag, otyp, credp); 3112 return (ENXIO); 3113 } 3114 3115 return (0); 3116 } 3117 3118 /*ARGSUSED*/ 3119 static void 3120 xdf_watch_hp_status_cb(dev_info_t *dip, const char *path, void *arg) 3121 { 3122 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 3123 cv_broadcast(&vdp->xdf_hp_status_cv); 3124 } 3125 3126 static int 3127 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 3128 char *name, caddr_t valuep, int *lengthp) 3129 { 3130 xdf_t *vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 3131 3132 /* 3133 * Sanity check that if a dev_t or dip were specified that they 3134 * correspond to this device driver. On debug kernels we'll 3135 * panic and on non-debug kernels we'll return failure. 3136 */ 3137 ASSERT(ddi_driver_major(dip) == xdf_major); 3138 ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdf_major)); 3139 if ((ddi_driver_major(dip) != xdf_major) || 3140 ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdf_major))) 3141 return (DDI_PROP_NOT_FOUND); 3142 3143 if (vdp == NULL) 3144 return (ddi_prop_op(dev, dip, prop_op, flags, 3145 name, valuep, lengthp)); 3146 3147 return (cmlb_prop_op(vdp->xdf_vd_lbl, 3148 dev, dip, prop_op, flags, name, valuep, lengthp, 3149 XDF_PART(getminor(dev)), NULL)); 3150 } 3151 3152 /*ARGSUSED*/ 3153 static int 3154 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 3155 { 3156 int instance = XDF_INST(getminor((dev_t)arg)); 3157 xdf_t *vbdp; 3158 3159 switch (cmd) { 3160 case DDI_INFO_DEVT2DEVINFO: 3161 if ((vbdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) { 3162 *rp = NULL; 3163 return (DDI_FAILURE); 3164 } 3165 *rp = vbdp->xdf_dip; 3166 return (DDI_SUCCESS); 3167 3168 case DDI_INFO_DEVT2INSTANCE: 3169 *rp = (void *)(uintptr_t)instance; 3170 return (DDI_SUCCESS); 3171 3172 default: 3173 return (DDI_FAILURE); 3174 } 3175 } 3176 3177 /*ARGSUSED*/ 3178 static int 3179 xdf_resume(dev_info_t *dip) 3180 { 3181 xdf_t *vdp; 3182 char *oename; 3183 3184 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 3185 goto err; 3186 3187 if (xdf_debug & SUSRES_DBG) 3188 xen_printf("xdf@%s: xdf_resume\n", vdp->xdf_addr); 3189 3190 mutex_enter(&vdp->xdf_cb_lk); 3191 3192 if (xvdi_resume(dip) != DDI_SUCCESS) { 3193 mutex_exit(&vdp->xdf_cb_lk); 3194 goto err; 3195 } 3196 3197 if (((oename = xvdi_get_oename(dip)) == NULL) || 3198 (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3199 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)) { 3200 mutex_exit(&vdp->xdf_cb_lk); 3201 goto err; 3202 } 3203 3204 mutex_enter(&vdp->xdf_dev_lk); 3205 ASSERT(vdp->xdf_state != XD_READY); 3206 xdf_set_state(vdp, XD_UNKNOWN); 3207 mutex_exit(&vdp->xdf_dev_lk); 3208 3209 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3210 mutex_exit(&vdp->xdf_cb_lk); 3211 goto err; 3212 } 3213 3214 mutex_exit(&vdp->xdf_cb_lk); 3215 3216 if (xdf_debug & SUSRES_DBG) 3217 xen_printf("xdf@%s: xdf_resume: done\n", vdp->xdf_addr); 3218 return (DDI_SUCCESS); 3219 err: 3220 if (xdf_debug & SUSRES_DBG) 3221 xen_printf("xdf@%s: xdf_resume: fail\n", vdp->xdf_addr); 3222 return (DDI_FAILURE); 3223 } 3224 3225 static int 3226 xdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3227 { 3228 int n, instance = ddi_get_instance(dip); 3229 ddi_iblock_cookie_t ibc, softibc; 3230 boolean_t dev_iscd = B_FALSE; 3231 xdf_t *vdp; 3232 char *oename, *xsname, *str; 3233 3234 if ((n = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_NOTPROM, 3235 "xdf_debug", 0)) != 0) 3236 xdf_debug = n; 3237 3238 switch (cmd) { 3239 case DDI_RESUME: 3240 return (xdf_resume(dip)); 3241 case DDI_ATTACH: 3242 break; 3243 default: 3244 return (DDI_FAILURE); 3245 } 3246 /* DDI_ATTACH */ 3247 3248 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 3249 ((oename = xvdi_get_oename(dip)) == NULL)) 3250 return (DDI_FAILURE); 3251 3252 /* 3253 * Disable auto-detach. This is necessary so that we don't get 3254 * detached while we're disconnected from the back end. 3255 */ 3256 if ((ddi_prop_update_int(DDI_DEV_T_NONE, dip, 3257 DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS)) 3258 return (DDI_FAILURE); 3259 3260 /* driver handles kernel-issued IOCTLs */ 3261 if (ddi_prop_create(DDI_DEV_T_NONE, dip, 3262 DDI_PROP_CANSLEEP, DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) 3263 return (DDI_FAILURE); 3264 3265 if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS) 3266 return (DDI_FAILURE); 3267 3268 if (ddi_get_soft_iblock_cookie(dip, 3269 DDI_SOFTINT_LOW, &softibc) != DDI_SUCCESS) 3270 return (DDI_FAILURE); 3271 3272 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 3273 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 3274 ddi_get_name_addr(dip)); 3275 return (DDI_FAILURE); 3276 } 3277 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 3278 dev_iscd = B_TRUE; 3279 strfree(str); 3280 3281 if (ddi_soft_state_zalloc(xdf_ssp, instance) != DDI_SUCCESS) 3282 return (DDI_FAILURE); 3283 3284 DPRINTF(DDI_DBG, ("xdf@%s: attaching\n", ddi_get_name_addr(dip))); 3285 vdp = ddi_get_soft_state(xdf_ssp, instance); 3286 ddi_set_driver_private(dip, vdp); 3287 vdp->xdf_dip = dip; 3288 vdp->xdf_addr = ddi_get_name_addr(dip); 3289 vdp->xdf_suspending = B_FALSE; 3290 vdp->xdf_media_req_supported = B_FALSE; 3291 vdp->xdf_peer = INVALID_DOMID; 3292 vdp->xdf_evtchn = INVALID_EVTCHN; 3293 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 3294 offsetof(v_req_t, v_link)); 3295 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 3296 cv_init(&vdp->xdf_hp_status_cv, NULL, CV_DEFAULT, NULL); 3297 cv_init(&vdp->xdf_mstate_cv, NULL, CV_DEFAULT, NULL); 3298 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3299 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3300 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3301 vdp->xdf_cmbl_reattach = B_TRUE; 3302 if (dev_iscd) { 3303 vdp->xdf_dinfo |= VDISK_CDROM; 3304 vdp->xdf_mstate = DKIO_EJECTED; 3305 } else { 3306 vdp->xdf_mstate = DKIO_NONE; 3307 } 3308 3309 if ((vdp->xdf_ready_tq = ddi_taskq_create(dip, "xdf_ready_tq", 3310 1, TASKQ_DEFAULTPRI, 0)) == NULL) 3311 goto errout0; 3312 3313 if (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3314 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS) 3315 goto errout0; 3316 3317 if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 3318 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 3319 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 3320 ddi_get_name_addr(dip)); 3321 goto errout0; 3322 } 3323 3324 /* 3325 * Initialize the physical geometry stucture. Note that currently 3326 * we don't know the size of the backend device so the number 3327 * of blocks on the device will be initialized to zero. Once 3328 * we connect to the backend device we'll update the physical 3329 * geometry to reflect the real size of the device. 3330 */ 3331 xdf_synthetic_pgeom(dip, &vdp->xdf_pgeom); 3332 vdp->xdf_pgeom_fixed = B_FALSE; 3333 3334 /* 3335 * create default device minor nodes: non-removable disk 3336 * we will adjust minor nodes after we are connected w/ backend 3337 */ 3338 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 3339 if (xdf_cmlb_attach(vdp) != 0) { 3340 cmn_err(CE_WARN, 3341 "xdf@%s: attach failed, cmlb attach failed", 3342 ddi_get_name_addr(dip)); 3343 goto errout0; 3344 } 3345 3346 /* 3347 * We ship with cache-enabled disks 3348 */ 3349 vdp->xdf_wce = B_TRUE; 3350 3351 mutex_enter(&vdp->xdf_cb_lk); 3352 /* Watch backend XenbusState change */ 3353 if (xvdi_add_event_handler(dip, 3354 XS_OE_STATE, xdf_oe_change, NULL) != DDI_SUCCESS) { 3355 mutex_exit(&vdp->xdf_cb_lk); 3356 goto errout0; 3357 } 3358 3359 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3360 cmn_err(CE_WARN, "xdf@%s: start connection failed", 3361 ddi_get_name_addr(dip)); 3362 mutex_exit(&vdp->xdf_cb_lk); 3363 goto errout1; 3364 } 3365 mutex_exit(&vdp->xdf_cb_lk); 3366 3367 #if defined(XPV_HVM_DRIVER) 3368 3369 xdf_hvm_add(dip); 3370 3371 /* Report our version to dom0. */ 3372 if (xenbus_printf(XBT_NULL, "guest/xdf", "version", "%d", 3373 HVMPV_XDF_VERS)) 3374 cmn_err(CE_WARN, "xdf: couldn't write version\n"); 3375 3376 #endif /* XPV_HVM_DRIVER */ 3377 3378 /* create kstat for iostat(1M) */ 3379 if (xdf_kstat_create(dip, "xdf", instance) != 0) { 3380 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 3381 ddi_get_name_addr(dip)); 3382 goto errout1; 3383 } 3384 3385 3386 ddi_report_dev(dip); 3387 DPRINTF(DDI_DBG, ("xdf@%s: attached\n", vdp->xdf_addr)); 3388 return (DDI_SUCCESS); 3389 3390 errout1: 3391 (void) xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed); 3392 xvdi_remove_event_handler(dip, XS_OE_STATE); 3393 errout0: 3394 if (vdp->xdf_vd_lbl != NULL) { 3395 cmlb_detach(vdp->xdf_vd_lbl, NULL); 3396 cmlb_free_handle(&vdp->xdf_vd_lbl); 3397 vdp->xdf_vd_lbl = NULL; 3398 } 3399 if (vdp->xdf_softintr_id != NULL) 3400 ddi_remove_softintr(vdp->xdf_softintr_id); 3401 xvdi_remove_xb_watch_handlers(dip); 3402 if (vdp->xdf_ready_tq != NULL) 3403 ddi_taskq_destroy(vdp->xdf_ready_tq); 3404 mutex_destroy(&vdp->xdf_cb_lk); 3405 mutex_destroy(&vdp->xdf_dev_lk); 3406 cv_destroy(&vdp->xdf_dev_cv); 3407 cv_destroy(&vdp->xdf_hp_status_cv); 3408 ddi_soft_state_free(xdf_ssp, instance); 3409 ddi_set_driver_private(dip, NULL); 3410 ddi_prop_remove_all(dip); 3411 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(dip)); 3412 return (DDI_FAILURE); 3413 } 3414 3415 static int 3416 xdf_suspend(dev_info_t *dip) 3417 { 3418 int instance = ddi_get_instance(dip); 3419 xdf_t *vdp; 3420 3421 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) 3422 return (DDI_FAILURE); 3423 3424 if (xdf_debug & SUSRES_DBG) 3425 xen_printf("xdf@%s: xdf_suspend\n", vdp->xdf_addr); 3426 3427 xvdi_suspend(dip); 3428 3429 mutex_enter(&vdp->xdf_cb_lk); 3430 mutex_enter(&vdp->xdf_dev_lk); 3431 3432 vdp->xdf_suspending = B_TRUE; 3433 xdf_ring_destroy(vdp); 3434 xdf_set_state(vdp, XD_SUSPEND); 3435 vdp->xdf_suspending = B_FALSE; 3436 3437 mutex_exit(&vdp->xdf_dev_lk); 3438 mutex_exit(&vdp->xdf_cb_lk); 3439 3440 if (xdf_debug & SUSRES_DBG) 3441 xen_printf("xdf@%s: xdf_suspend: done\n", vdp->xdf_addr); 3442 3443 return (DDI_SUCCESS); 3444 } 3445 3446 static int 3447 xdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3448 { 3449 xdf_t *vdp; 3450 int instance; 3451 3452 switch (cmd) { 3453 3454 case DDI_PM_SUSPEND: 3455 break; 3456 3457 case DDI_SUSPEND: 3458 return (xdf_suspend(dip)); 3459 3460 case DDI_DETACH: 3461 break; 3462 3463 default: 3464 return (DDI_FAILURE); 3465 } 3466 3467 instance = ddi_get_instance(dip); 3468 DPRINTF(DDI_DBG, ("xdf@%s: detaching\n", ddi_get_name_addr(dip))); 3469 vdp = ddi_get_soft_state(xdf_ssp, instance); 3470 3471 if (vdp == NULL) 3472 return (DDI_FAILURE); 3473 3474 mutex_enter(&vdp->xdf_cb_lk); 3475 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 3476 if (vdp->xdf_state != XD_CLOSED) { 3477 mutex_exit(&vdp->xdf_cb_lk); 3478 return (DDI_FAILURE); 3479 } 3480 mutex_exit(&vdp->xdf_cb_lk); 3481 3482 ASSERT(!ISDMACBON(vdp)); 3483 3484 #if defined(XPV_HVM_DRIVER) 3485 xdf_hvm_rm(dip); 3486 #endif /* XPV_HVM_DRIVER */ 3487 3488 if (vdp->xdf_timeout_id != 0) 3489 (void) untimeout(vdp->xdf_timeout_id); 3490 3491 xvdi_remove_event_handler(dip, XS_OE_STATE); 3492 ddi_taskq_destroy(vdp->xdf_ready_tq); 3493 3494 cmlb_detach(vdp->xdf_vd_lbl, NULL); 3495 cmlb_free_handle(&vdp->xdf_vd_lbl); 3496 3497 /* we'll support backend running in domU later */ 3498 #ifdef DOMU_BACKEND 3499 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 3500 #endif 3501 3502 list_destroy(&vdp->xdf_vreq_act); 3503 ddi_prop_remove_all(dip); 3504 xdf_kstat_delete(dip); 3505 ddi_remove_softintr(vdp->xdf_softintr_id); 3506 xvdi_remove_xb_watch_handlers(dip); 3507 ddi_set_driver_private(dip, NULL); 3508 cv_destroy(&vdp->xdf_dev_cv); 3509 mutex_destroy(&vdp->xdf_cb_lk); 3510 mutex_destroy(&vdp->xdf_dev_lk); 3511 if (vdp->xdf_cache_flush_block != NULL) 3512 kmem_free(vdp->xdf_flush_mem, 2 * vdp->xdf_xdev_secsize); 3513 ddi_soft_state_free(xdf_ssp, instance); 3514 return (DDI_SUCCESS); 3515 } 3516 3517 /* 3518 * Driver linkage structures. 3519 */ 3520 static struct cb_ops xdf_cbops = { 3521 xdf_open, 3522 xdf_close, 3523 xdf_strategy, 3524 nodev, 3525 xdf_dump, 3526 xdf_read, 3527 xdf_write, 3528 xdf_ioctl, 3529 nodev, 3530 nodev, 3531 nodev, 3532 nochpoll, 3533 xdf_prop_op, 3534 NULL, 3535 D_MP | D_NEW | D_64BIT, 3536 CB_REV, 3537 xdf_aread, 3538 xdf_awrite 3539 }; 3540 3541 struct dev_ops xdf_devops = { 3542 DEVO_REV, /* devo_rev */ 3543 0, /* devo_refcnt */ 3544 xdf_getinfo, /* devo_getinfo */ 3545 nulldev, /* devo_identify */ 3546 nulldev, /* devo_probe */ 3547 xdf_attach, /* devo_attach */ 3548 xdf_detach, /* devo_detach */ 3549 nodev, /* devo_reset */ 3550 &xdf_cbops, /* devo_cb_ops */ 3551 NULL, /* devo_bus_ops */ 3552 NULL, /* devo_power */ 3553 ddi_quiesce_not_supported, /* devo_quiesce */ 3554 }; 3555 3556 /* 3557 * Module linkage structures. 3558 */ 3559 static struct modldrv modldrv = { 3560 &mod_driverops, /* Type of module. This one is a driver */ 3561 "virtual block driver", /* short description */ 3562 &xdf_devops /* driver specific ops */ 3563 }; 3564 3565 static struct modlinkage xdf_modlinkage = { 3566 MODREV_1, (void *)&modldrv, NULL 3567 }; 3568 3569 /* 3570 * standard module entry points 3571 */ 3572 int 3573 _init(void) 3574 { 3575 int rc; 3576 3577 xdf_major = ddi_name_to_major("xdf"); 3578 if (xdf_major == (major_t)-1) 3579 return (EINVAL); 3580 3581 if ((rc = ddi_soft_state_init(&xdf_ssp, sizeof (xdf_t), 0)) != 0) 3582 return (rc); 3583 3584 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 3585 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3586 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 3587 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3588 3589 #if defined(XPV_HVM_DRIVER) 3590 xdf_hvm_init(); 3591 #endif /* XPV_HVM_DRIVER */ 3592 3593 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 3594 #if defined(XPV_HVM_DRIVER) 3595 xdf_hvm_fini(); 3596 #endif /* XPV_HVM_DRIVER */ 3597 kmem_cache_destroy(xdf_vreq_cache); 3598 kmem_cache_destroy(xdf_gs_cache); 3599 ddi_soft_state_fini(&xdf_ssp); 3600 return (rc); 3601 } 3602 3603 return (rc); 3604 } 3605 3606 int 3607 _fini(void) 3608 { 3609 int err; 3610 if ((err = mod_remove(&xdf_modlinkage)) != 0) 3611 return (err); 3612 3613 #if defined(XPV_HVM_DRIVER) 3614 xdf_hvm_fini(); 3615 #endif /* XPV_HVM_DRIVER */ 3616 3617 kmem_cache_destroy(xdf_vreq_cache); 3618 kmem_cache_destroy(xdf_gs_cache); 3619 ddi_soft_state_fini(&xdf_ssp); 3620 3621 return (0); 3622 } 3623 3624 int 3625 _info(struct modinfo *modinfop) 3626 { 3627 return (mod_info(&xdf_modlinkage, modinfop)); 3628 } 3629