1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * xdf.c - Xen Virtual Block Device Driver 29 * TODO: 30 * - support alternate block size (currently only DEV_BSIZE supported) 31 * - revalidate geometry for removable devices 32 * 33 * This driver export solaris disk device nodes, accepts IO requests from 34 * those nodes, and services those requests by talking to a backend device 35 * in another domain. 36 * 37 * Communication with the backend device is done via a ringbuffer (which is 38 * managed via xvdi interfaces) and dma memory (which is managed via ddi 39 * interfaces). 40 * 41 * Communication with the backend device is dependant upon establishing a 42 * connection to the backend device. This connection process involves 43 * reading device configuration information from xenbus and publishing 44 * some frontend runtime configuration parameters via the xenbus (for 45 * consumption by the backend). Once we've published runtime configuration 46 * information via the xenbus, the backend device can enter the connected 47 * state and we'll enter the XD_CONNECTED state. But before we can allow 48 * random IO to begin, we need to do IO to the backend device to determine 49 * the device label and if flush operations are supported. Once this is 50 * done we enter the XD_READY state and can process any IO operations. 51 * 52 * We recieve notifications of xenbus state changes for the backend device 53 * (aka, the "other end") via the xdf_oe_change() callback. This callback 54 * is single threaded, meaning that we can't recieve new notification of 55 * other end state changes while we're processing an outstanding 56 * notification of an other end state change. There for we can't do any 57 * blocking operations from the xdf_oe_change() callback. This is why we 58 * have a seperate taskq (xdf_ready_tq) which exists to do the necessary 59 * IO to get us from the XD_CONNECTED to the XD_READY state. All IO 60 * generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go 61 * throught xdf_lb_rdwr(), which is a synchronous IO interface. IOs 62 * generated by the xdf_ready_tq_thread thread have priority over all 63 * other IO requests. 64 * 65 * We also communicate with the backend device via the xenbus "media-req" 66 * (XBP_MEDIA_REQ) property. For more information on this see the 67 * comments in blkif.h. 68 */ 69 70 #include <io/xdf.h> 71 72 #include <sys/conf.h> 73 #include <sys/dkio.h> 74 #include <sys/promif.h> 75 #include <sys/sysmacros.h> 76 #include <sys/kstat.h> 77 #include <sys/mach_mmu.h> 78 #ifdef XPV_HVM_DRIVER 79 #include <sys/xpv_support.h> 80 #include <sys/sunndi.h> 81 #else /* !XPV_HVM_DRIVER */ 82 #include <sys/evtchn_impl.h> 83 #endif /* !XPV_HVM_DRIVER */ 84 #include <public/io/xenbus.h> 85 #include <xen/sys/xenbus_impl.h> 86 #include <sys/scsi/generic/inquiry.h> 87 #include <xen/io/blkif_impl.h> 88 #include <sys/fdio.h> 89 #include <sys/cdio.h> 90 91 /* 92 * DEBUG_EVAL can be used to include debug only statements without 93 * having to use '#ifdef DEBUG' statements 94 */ 95 #ifdef DEBUG 96 #define DEBUG_EVAL(x) (x) 97 #else /* !DEBUG */ 98 #define DEBUG_EVAL(x) 99 #endif /* !DEBUG */ 100 101 #define XDF_DRAIN_MSEC_DELAY (50*1000) /* 00.05 sec */ 102 #define XDF_DRAIN_RETRY_COUNT 200 /* 10.00 sec */ 103 104 #define INVALID_DOMID ((domid_t)-1) 105 #define FLUSH_DISKCACHE 0x1 106 #define WRITE_BARRIER 0x2 107 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 108 #define USE_WRITE_BARRIER(vdp) \ 109 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 110 #define USE_FLUSH_DISKCACHE(vdp) \ 111 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 112 #define IS_WRITE_BARRIER(vdp, bp) \ 113 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 114 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 115 #define IS_FLUSH_DISKCACHE(bp) \ 116 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 117 118 #define VREQ_DONE(vreq) \ 119 VOID2BOOLEAN(((vreq)->v_status == VREQ_DMAWIN_DONE) && \ 120 (((vreq)->v_flush_diskcache == FLUSH_DISKCACHE) || \ 121 (((vreq)->v_dmaw + 1) == (vreq)->v_ndmaws))) 122 123 #define BP_VREQ(bp) ((v_req_t *)((bp)->av_back)) 124 #define BP_VREQ_SET(bp, vreq) (((bp)->av_back = (buf_t *)(vreq))) 125 126 extern int do_polled_io; 127 128 /* run-time tunables that we don't want the compiler to optimize away */ 129 volatile int xdf_debug = 0; 130 volatile boolean_t xdf_barrier_flush_disable = B_FALSE; 131 132 /* per module globals */ 133 major_t xdf_major; 134 static void *xdf_ssp; 135 static kmem_cache_t *xdf_vreq_cache; 136 static kmem_cache_t *xdf_gs_cache; 137 static int xdf_maxphys = XB_MAXPHYS; 138 static diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 139 static int xdf_fbrewrites; /* flush block re-write count */ 140 141 /* misc public functions (used by xdf_shell.c) */ 142 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); 143 int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 144 145 /* misc private functions */ 146 static void xdf_io_start(xdf_t *); 147 148 /* callbacks from commmon label */ 149 static cmlb_tg_ops_t xdf_lb_ops = { 150 TG_DK_OPS_VERSION_1, 151 xdf_lb_rdwr, 152 xdf_lb_getinfo 153 }; 154 155 /* 156 * I/O buffer DMA attributes 157 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 158 */ 159 static ddi_dma_attr_t xb_dma_attr = { 160 DMA_ATTR_V0, 161 (uint64_t)0, /* lowest address */ 162 (uint64_t)0xffffffffffffffff, /* highest usable address */ 163 (uint64_t)0xffffff, /* DMA counter limit max */ 164 (uint64_t)XB_BSIZE, /* alignment in bytes */ 165 XB_BSIZE - 1, /* bitmap of burst sizes */ 166 XB_BSIZE, /* min transfer */ 167 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 168 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 169 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 170 XB_BSIZE, /* granularity */ 171 0, /* flags (reserved) */ 172 }; 173 174 static ddi_device_acc_attr_t xc_acc_attr = { 175 DDI_DEVICE_ATTR_V0, 176 DDI_NEVERSWAP_ACC, 177 DDI_STRICTORDER_ACC 178 }; 179 180 static void 181 xdf_timeout_handler(void *arg) 182 { 183 xdf_t *vdp = arg; 184 185 mutex_enter(&vdp->xdf_dev_lk); 186 vdp->xdf_timeout_id = 0; 187 mutex_exit(&vdp->xdf_dev_lk); 188 189 /* new timeout thread could be re-scheduled */ 190 xdf_io_start(vdp); 191 } 192 193 /* 194 * callback func when DMA/GTE resources is available 195 * 196 * Note: we only register one callback function to grant table subsystem 197 * since we only have one 'struct gnttab_free_callback' in xdf_t. 198 */ 199 static int 200 xdf_dmacallback(caddr_t arg) 201 { 202 xdf_t *vdp = (xdf_t *)arg; 203 ASSERT(vdp != NULL); 204 205 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 206 vdp->xdf_addr)); 207 208 ddi_trigger_softintr(vdp->xdf_softintr_id); 209 return (DDI_DMA_CALLBACK_DONE); 210 } 211 212 static ge_slot_t * 213 gs_get(xdf_t *vdp, int isread) 214 { 215 grant_ref_t gh; 216 ge_slot_t *gs; 217 218 /* try to alloc GTEs needed in this slot, first */ 219 if (gnttab_alloc_grant_references( 220 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 221 if (vdp->xdf_gnt_callback.next == NULL) { 222 SETDMACBON(vdp); 223 gnttab_request_free_callback( 224 &vdp->xdf_gnt_callback, 225 (void (*)(void *))xdf_dmacallback, 226 (void *)vdp, 227 BLKIF_MAX_SEGMENTS_PER_REQUEST); 228 } 229 return (NULL); 230 } 231 232 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 233 if (gs == NULL) { 234 gnttab_free_grant_references(gh); 235 if (vdp->xdf_timeout_id == 0) 236 /* restart I/O after one second */ 237 vdp->xdf_timeout_id = 238 timeout(xdf_timeout_handler, vdp, hz); 239 return (NULL); 240 } 241 242 /* init gs_slot */ 243 gs->gs_oeid = vdp->xdf_peer; 244 gs->gs_isread = isread; 245 gs->gs_ghead = gh; 246 gs->gs_ngrefs = 0; 247 248 return (gs); 249 } 250 251 static void 252 gs_free(ge_slot_t *gs) 253 { 254 int i; 255 256 /* release all grant table entry resources used in this slot */ 257 for (i = 0; i < gs->gs_ngrefs; i++) 258 gnttab_end_foreign_access(gs->gs_ge[i], !gs->gs_isread, 0); 259 gnttab_free_grant_references(gs->gs_ghead); 260 list_remove(&gs->gs_vreq->v_gs, gs); 261 kmem_cache_free(xdf_gs_cache, gs); 262 } 263 264 static grant_ref_t 265 gs_grant(ge_slot_t *gs, mfn_t mfn) 266 { 267 grant_ref_t gr = gnttab_claim_grant_reference(&gs->gs_ghead); 268 269 ASSERT(gr != -1); 270 ASSERT(gs->gs_ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 271 gs->gs_ge[gs->gs_ngrefs++] = gr; 272 gnttab_grant_foreign_access_ref(gr, gs->gs_oeid, mfn, !gs->gs_isread); 273 274 return (gr); 275 } 276 277 /* 278 * Alloc a vreq for this bp 279 * bp->av_back contains the pointer to the vreq upon return 280 */ 281 static v_req_t * 282 vreq_get(xdf_t *vdp, buf_t *bp) 283 { 284 v_req_t *vreq = NULL; 285 286 ASSERT(BP_VREQ(bp) == NULL); 287 288 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 289 if (vreq == NULL) { 290 if (vdp->xdf_timeout_id == 0) 291 /* restart I/O after one second */ 292 vdp->xdf_timeout_id = 293 timeout(xdf_timeout_handler, vdp, hz); 294 return (NULL); 295 } 296 bzero(vreq, sizeof (v_req_t)); 297 list_create(&vreq->v_gs, sizeof (ge_slot_t), 298 offsetof(ge_slot_t, gs_vreq_link)); 299 vreq->v_buf = bp; 300 vreq->v_status = VREQ_INIT; 301 vreq->v_runq = B_FALSE; 302 BP_VREQ_SET(bp, vreq); 303 /* init of other fields in vreq is up to the caller */ 304 305 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 306 307 return (vreq); 308 } 309 310 static void 311 vreq_free(xdf_t *vdp, v_req_t *vreq) 312 { 313 buf_t *bp = vreq->v_buf; 314 315 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 316 ASSERT(BP_VREQ(bp) == vreq); 317 318 list_remove(&vdp->xdf_vreq_act, vreq); 319 320 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 321 goto done; 322 323 switch (vreq->v_status) { 324 case VREQ_DMAWIN_DONE: 325 case VREQ_GS_ALLOCED: 326 case VREQ_DMABUF_BOUND: 327 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 328 /*FALLTHRU*/ 329 case VREQ_DMAMEM_ALLOCED: 330 if (!ALIGNED_XFER(bp)) { 331 ASSERT(vreq->v_abuf != NULL); 332 if (!IS_ERROR(bp) && IS_READ(bp)) 333 bcopy(vreq->v_abuf, bp->b_un.b_addr, 334 bp->b_bcount); 335 ddi_dma_mem_free(&vreq->v_align); 336 } 337 /*FALLTHRU*/ 338 case VREQ_MEMDMAHDL_ALLOCED: 339 if (!ALIGNED_XFER(bp)) 340 ddi_dma_free_handle(&vreq->v_memdmahdl); 341 /*FALLTHRU*/ 342 case VREQ_DMAHDL_ALLOCED: 343 ddi_dma_free_handle(&vreq->v_dmahdl); 344 break; 345 default: 346 break; 347 } 348 done: 349 ASSERT(!vreq->v_runq); 350 list_destroy(&vreq->v_gs); 351 kmem_cache_free(xdf_vreq_cache, vreq); 352 } 353 354 /* 355 * Snarf new data if our flush block was re-written 356 */ 357 static void 358 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 359 { 360 int nblks; 361 boolean_t mapin; 362 363 if (IS_WRITE_BARRIER(vdp, bp)) 364 return; /* write was a flush write */ 365 366 mapin = B_FALSE; 367 nblks = bp->b_bcount >> DEV_BSHIFT; 368 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 369 xdf_fbrewrites++; 370 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 371 mapin = B_TRUE; 372 bp_mapin(bp); 373 } 374 bcopy(bp->b_un.b_addr + 375 ((xdf_flush_block - blkno) << DEV_BSHIFT), 376 vdp->xdf_cache_flush_block, DEV_BSIZE); 377 if (mapin) 378 bp_mapout(bp); 379 } 380 } 381 382 /* 383 * Initalize the DMA and grant table resources for the buf 384 */ 385 static int 386 vreq_setup(xdf_t *vdp, v_req_t *vreq) 387 { 388 int rc; 389 ddi_dma_attr_t dmaattr; 390 uint_t ndcs, ndws; 391 ddi_dma_handle_t dh; 392 ddi_dma_handle_t mdh; 393 ddi_dma_cookie_t dc; 394 ddi_acc_handle_t abh; 395 caddr_t aba; 396 ge_slot_t *gs; 397 size_t bufsz; 398 off_t off; 399 size_t sz; 400 buf_t *bp = vreq->v_buf; 401 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 402 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 403 404 switch (vreq->v_status) { 405 case VREQ_INIT: 406 if (IS_FLUSH_DISKCACHE(bp)) { 407 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 408 DPRINTF(DMA_DBG, ("xdf@%s: " 409 "get ge_slotfailed\n", vdp->xdf_addr)); 410 return (DDI_FAILURE); 411 } 412 vreq->v_blkno = 0; 413 vreq->v_nslots = 1; 414 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 415 vreq->v_status = VREQ_GS_ALLOCED; 416 gs->gs_vreq = vreq; 417 list_insert_head(&vreq->v_gs, gs); 418 return (DDI_SUCCESS); 419 } 420 421 if (IS_WRITE_BARRIER(vdp, bp)) 422 vreq->v_flush_diskcache = WRITE_BARRIER; 423 vreq->v_blkno = bp->b_blkno + 424 (diskaddr_t)(uintptr_t)bp->b_private; 425 /* See if we wrote new data to our flush block */ 426 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 427 check_fbwrite(vdp, bp, vreq->v_blkno); 428 vreq->v_status = VREQ_INIT_DONE; 429 /*FALLTHRU*/ 430 431 case VREQ_INIT_DONE: 432 /* 433 * alloc DMA handle 434 */ 435 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 436 xdf_dmacallback, (caddr_t)vdp, &dh); 437 if (rc != DDI_SUCCESS) { 438 SETDMACBON(vdp); 439 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 440 vdp->xdf_addr)); 441 return (DDI_FAILURE); 442 } 443 444 vreq->v_dmahdl = dh; 445 vreq->v_status = VREQ_DMAHDL_ALLOCED; 446 /*FALLTHRU*/ 447 448 case VREQ_DMAHDL_ALLOCED: 449 /* 450 * alloc dma handle for 512-byte aligned buf 451 */ 452 if (!ALIGNED_XFER(bp)) { 453 /* 454 * XXPV: we need to temporarily enlarge the seg 455 * boundary and s/g length to work round CR6381968 456 */ 457 dmaattr = xb_dma_attr; 458 dmaattr.dma_attr_seg = (uint64_t)-1; 459 dmaattr.dma_attr_sgllen = INT_MAX; 460 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 461 xdf_dmacallback, (caddr_t)vdp, &mdh); 462 if (rc != DDI_SUCCESS) { 463 SETDMACBON(vdp); 464 DPRINTF(DMA_DBG, ("xdf@%s: " 465 "unaligned buf DMAhandle alloc failed\n", 466 vdp->xdf_addr)); 467 return (DDI_FAILURE); 468 } 469 vreq->v_memdmahdl = mdh; 470 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 471 } 472 /*FALLTHRU*/ 473 474 case VREQ_MEMDMAHDL_ALLOCED: 475 /* 476 * alloc 512-byte aligned buf 477 */ 478 if (!ALIGNED_XFER(bp)) { 479 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 480 bp_mapin(bp); 481 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 482 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 483 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 484 &aba, &bufsz, &abh); 485 if (rc != DDI_SUCCESS) { 486 SETDMACBON(vdp); 487 DPRINTF(DMA_DBG, ("xdf@%s: " 488 "DMA mem allocation failed\n", 489 vdp->xdf_addr)); 490 return (DDI_FAILURE); 491 } 492 493 vreq->v_abuf = aba; 494 vreq->v_align = abh; 495 vreq->v_status = VREQ_DMAMEM_ALLOCED; 496 497 ASSERT(bufsz >= bp->b_bcount); 498 if (!IS_READ(bp)) 499 bcopy(bp->b_un.b_addr, vreq->v_abuf, 500 bp->b_bcount); 501 } 502 /*FALLTHRU*/ 503 504 case VREQ_DMAMEM_ALLOCED: 505 /* 506 * dma bind 507 */ 508 if (ALIGNED_XFER(bp)) { 509 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 510 dma_flags, xdf_dmacallback, (caddr_t)vdp, 511 &dc, &ndcs); 512 } else { 513 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 514 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 515 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 516 } 517 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 518 /* get num of dma windows */ 519 if (rc == DDI_DMA_PARTIAL_MAP) { 520 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 521 ASSERT(rc == DDI_SUCCESS); 522 } else { 523 ndws = 1; 524 } 525 } else { 526 SETDMACBON(vdp); 527 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 528 vdp->xdf_addr)); 529 return (DDI_FAILURE); 530 } 531 532 vreq->v_dmac = dc; 533 vreq->v_dmaw = 0; 534 vreq->v_ndmacs = ndcs; 535 vreq->v_ndmaws = ndws; 536 vreq->v_nslots = ndws; 537 vreq->v_status = VREQ_DMABUF_BOUND; 538 /*FALLTHRU*/ 539 540 case VREQ_DMABUF_BOUND: 541 /* 542 * get ge_slot, callback is set upon failure from gs_get(), 543 * if not set previously 544 */ 545 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 546 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 547 vdp->xdf_addr)); 548 return (DDI_FAILURE); 549 } 550 551 vreq->v_status = VREQ_GS_ALLOCED; 552 gs->gs_vreq = vreq; 553 list_insert_head(&vreq->v_gs, gs); 554 break; 555 556 case VREQ_GS_ALLOCED: 557 /* nothing need to be done */ 558 break; 559 560 case VREQ_DMAWIN_DONE: 561 /* 562 * move to the next dma window 563 */ 564 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 565 566 /* get a ge_slot for this DMA window */ 567 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 568 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 569 vdp->xdf_addr)); 570 return (DDI_FAILURE); 571 } 572 573 vreq->v_dmaw++; 574 VERIFY(ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 575 &vreq->v_dmac, &vreq->v_ndmacs) == DDI_SUCCESS); 576 vreq->v_status = VREQ_GS_ALLOCED; 577 gs->gs_vreq = vreq; 578 list_insert_head(&vreq->v_gs, gs); 579 break; 580 581 default: 582 return (DDI_FAILURE); 583 } 584 585 return (DDI_SUCCESS); 586 } 587 588 static int 589 xdf_cmlb_attach(xdf_t *vdp) 590 { 591 dev_info_t *dip = vdp->xdf_dip; 592 593 return (cmlb_attach(dip, &xdf_lb_ops, 594 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 595 XD_IS_RM(vdp), 596 B_TRUE, 597 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD, 598 #if defined(XPV_HVM_DRIVER) 599 (XD_IS_CD(vdp) ? 0 : CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT) | 600 CMLB_INTERNAL_MINOR_NODES, 601 #else /* !XPV_HVM_DRIVER */ 602 XD_IS_CD(vdp) ? 0 : CMLB_FAKE_LABEL_ONE_PARTITION, 603 #endif /* !XPV_HVM_DRIVER */ 604 vdp->xdf_vd_lbl, NULL)); 605 } 606 607 static void 608 xdf_io_err(buf_t *bp, int err, size_t resid) 609 { 610 bioerror(bp, err); 611 if (resid == 0) 612 bp->b_resid = bp->b_bcount; 613 biodone(bp); 614 } 615 616 static void 617 xdf_kstat_enter(xdf_t *vdp, buf_t *bp) 618 { 619 v_req_t *vreq = BP_VREQ(bp); 620 621 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 622 623 if (vdp->xdf_xdev_iostat == NULL) 624 return; 625 if ((vreq != NULL) && vreq->v_runq) { 626 kstat_runq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 627 } else { 628 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 629 } 630 } 631 632 static void 633 xdf_kstat_exit(xdf_t *vdp, buf_t *bp) 634 { 635 v_req_t *vreq = BP_VREQ(bp); 636 637 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 638 639 if (vdp->xdf_xdev_iostat == NULL) 640 return; 641 if ((vreq != NULL) && vreq->v_runq) { 642 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 643 } else { 644 kstat_waitq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 645 } 646 } 647 648 static void 649 xdf_kstat_waitq_to_runq(xdf_t *vdp, buf_t *bp) 650 { 651 v_req_t *vreq = BP_VREQ(bp); 652 653 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 654 ASSERT(!vreq->v_runq); 655 656 vreq->v_runq = B_TRUE; 657 if (vdp->xdf_xdev_iostat == NULL) 658 return; 659 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 660 } 661 662 static void 663 xdf_kstat_runq_to_waitq(xdf_t *vdp, buf_t *bp) 664 { 665 v_req_t *vreq = BP_VREQ(bp); 666 667 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 668 ASSERT(vreq->v_runq); 669 670 vreq->v_runq = B_FALSE; 671 if (vdp->xdf_xdev_iostat == NULL) 672 return; 673 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 674 } 675 676 int 677 xdf_kstat_create(dev_info_t *dip, char *ks_module, int instance) 678 { 679 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 680 kstat_t *kstat; 681 buf_t *bp; 682 683 if ((kstat = kstat_create( 684 ks_module, instance, NULL, "disk", 685 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) 686 return (-1); 687 688 /* See comment about locking in xdf_kstat_delete(). */ 689 mutex_enter(&vdp->xdf_iostat_lk); 690 mutex_enter(&vdp->xdf_dev_lk); 691 692 /* only one kstat can exist at a time */ 693 if (vdp->xdf_xdev_iostat != NULL) { 694 mutex_exit(&vdp->xdf_dev_lk); 695 mutex_exit(&vdp->xdf_iostat_lk); 696 kstat_delete(kstat); 697 return (-1); 698 } 699 700 vdp->xdf_xdev_iostat = kstat; 701 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 702 kstat_install(vdp->xdf_xdev_iostat); 703 704 /* 705 * Now that we've created a kstat, we need to update the waitq and 706 * runq counts for the kstat to reflect our current state. 707 * 708 * For a buf_t structure to be on the runq, it must have a ring 709 * buffer slot associated with it. To get a ring buffer slot the 710 * buf must first have a v_req_t and a ge_slot_t associated with it. 711 * Then when it is granted a ring buffer slot, v_runq will be set to 712 * true. 713 * 714 * For a buf_t structure to be on the waitq, it must not be on the 715 * runq. So to find all the buf_t's that should be on waitq, we 716 * walk the active buf list and add any buf_t's which aren't on the 717 * runq to the waitq. 718 */ 719 bp = vdp->xdf_f_act; 720 while (bp != NULL) { 721 xdf_kstat_enter(vdp, bp); 722 bp = bp->av_forw; 723 } 724 if (vdp->xdf_ready_tq_bp != NULL) 725 xdf_kstat_enter(vdp, vdp->xdf_ready_tq_bp); 726 727 mutex_exit(&vdp->xdf_dev_lk); 728 mutex_exit(&vdp->xdf_iostat_lk); 729 return (0); 730 } 731 732 void 733 xdf_kstat_delete(dev_info_t *dip) 734 { 735 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 736 kstat_t *kstat; 737 buf_t *bp; 738 739 /* 740 * The locking order here is xdf_iostat_lk and then xdf_dev_lk. 741 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer 742 * and the contents of the our kstat. xdf_iostat_lk is used 743 * to protect the allocation and freeing of the actual kstat. 744 * xdf_dev_lk can't be used for this purpose because kstat 745 * readers use it to access the contents of the kstat and 746 * hence it can't be held when calling kstat_delete(). 747 */ 748 mutex_enter(&vdp->xdf_iostat_lk); 749 mutex_enter(&vdp->xdf_dev_lk); 750 751 if (vdp->xdf_xdev_iostat == NULL) { 752 mutex_exit(&vdp->xdf_dev_lk); 753 mutex_exit(&vdp->xdf_iostat_lk); 754 return; 755 } 756 757 /* 758 * We're about to destroy the kstat structures, so it isn't really 759 * necessary to update the runq and waitq counts. But, since this 760 * isn't a hot code path we can afford to be a little pedantic and 761 * go ahead and decrement the runq and waitq kstat counters to zero 762 * before free'ing them. This helps us ensure that we've gotten all 763 * our accounting correct. 764 * 765 * For an explanation of how we determine which buffers go on the 766 * runq vs which go on the waitq, see the comments in 767 * xdf_kstat_create(). 768 */ 769 bp = vdp->xdf_f_act; 770 while (bp != NULL) { 771 xdf_kstat_exit(vdp, bp); 772 bp = bp->av_forw; 773 } 774 if (vdp->xdf_ready_tq_bp != NULL) 775 xdf_kstat_exit(vdp, vdp->xdf_ready_tq_bp); 776 777 kstat = vdp->xdf_xdev_iostat; 778 vdp->xdf_xdev_iostat = NULL; 779 mutex_exit(&vdp->xdf_dev_lk); 780 kstat_delete(kstat); 781 mutex_exit(&vdp->xdf_iostat_lk); 782 } 783 784 /* 785 * Add an IO requests onto the active queue. 786 * 787 * We have to detect IOs generated by xdf_ready_tq_thread. These IOs 788 * are used to establish a connection to the backend, so they recieve 789 * priority over all other IOs. Since xdf_ready_tq_thread only does 790 * synchronous IO, there can only be one xdf_ready_tq_thread request at any 791 * given time and we record the buf associated with that request in 792 * xdf_ready_tq_bp. 793 */ 794 static void 795 xdf_bp_push(xdf_t *vdp, buf_t *bp) 796 { 797 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 798 ASSERT(bp->av_forw == NULL); 799 800 xdf_kstat_enter(vdp, bp); 801 802 if (curthread == vdp->xdf_ready_tq_thread) { 803 /* new IO requests from the ready thread */ 804 ASSERT(vdp->xdf_ready_tq_bp == NULL); 805 vdp->xdf_ready_tq_bp = bp; 806 return; 807 } 808 809 /* this is normal IO request */ 810 ASSERT(bp != vdp->xdf_ready_tq_bp); 811 812 if (vdp->xdf_f_act == NULL) { 813 /* this is only only IO on the active queue */ 814 ASSERT(vdp->xdf_l_act == NULL); 815 ASSERT(vdp->xdf_i_act == NULL); 816 vdp->xdf_f_act = vdp->xdf_l_act = vdp->xdf_i_act = bp; 817 return; 818 } 819 820 /* add this IO to the tail of the active queue */ 821 vdp->xdf_l_act->av_forw = bp; 822 vdp->xdf_l_act = bp; 823 if (vdp->xdf_i_act == NULL) 824 vdp->xdf_i_act = bp; 825 } 826 827 static void 828 xdf_bp_pop(xdf_t *vdp, buf_t *bp) 829 { 830 buf_t *bp_iter; 831 832 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 833 ASSERT(VREQ_DONE(BP_VREQ(bp))); 834 835 if (vdp->xdf_ready_tq_bp == bp) { 836 /* we're done with a ready thread IO request */ 837 ASSERT(bp->av_forw == NULL); 838 vdp->xdf_ready_tq_bp = NULL; 839 return; 840 } 841 842 /* we're done with a normal IO request */ 843 ASSERT((bp->av_forw != NULL) || (bp == vdp->xdf_l_act)); 844 ASSERT((bp->av_forw == NULL) || (bp != vdp->xdf_l_act)); 845 ASSERT(VREQ_DONE(BP_VREQ(vdp->xdf_f_act))); 846 ASSERT(vdp->xdf_f_act != vdp->xdf_i_act); 847 848 if (bp == vdp->xdf_f_act) { 849 /* This IO was at the head of our active queue. */ 850 vdp->xdf_f_act = bp->av_forw; 851 if (bp == vdp->xdf_l_act) 852 vdp->xdf_l_act = NULL; 853 } else { 854 /* There IO finished before some other pending IOs. */ 855 bp_iter = vdp->xdf_f_act; 856 while (bp != bp_iter->av_forw) { 857 bp_iter = bp_iter->av_forw; 858 ASSERT(VREQ_DONE(BP_VREQ(bp_iter))); 859 ASSERT(bp_iter != vdp->xdf_i_act); 860 } 861 bp_iter->av_forw = bp->av_forw; 862 if (bp == vdp->xdf_l_act) 863 vdp->xdf_l_act = bp_iter; 864 } 865 bp->av_forw = NULL; 866 } 867 868 static buf_t * 869 xdf_bp_next(xdf_t *vdp) 870 { 871 v_req_t *vreq; 872 buf_t *bp; 873 874 if (vdp->xdf_state == XD_CONNECTED) { 875 /* 876 * If we're in the XD_CONNECTED state, we only service IOs 877 * from the xdf_ready_tq_thread thread. 878 */ 879 if ((bp = vdp->xdf_ready_tq_bp) == NULL) 880 return (NULL); 881 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 882 return (bp); 883 return (NULL); 884 } 885 886 /* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */ 887 if (vdp->xdf_state != XD_READY) 888 return (NULL); 889 890 ASSERT(vdp->xdf_ready_tq_bp == NULL); 891 for (;;) { 892 if ((bp = vdp->xdf_i_act) == NULL) 893 return (NULL); 894 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 895 return (bp); 896 897 /* advance the active buf index pointer */ 898 vdp->xdf_i_act = bp->av_forw; 899 } 900 } 901 902 static void 903 xdf_io_fini(xdf_t *vdp, uint64_t id, int bioerr) 904 { 905 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 906 v_req_t *vreq = gs->gs_vreq; 907 buf_t *bp = vreq->v_buf; 908 909 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 910 ASSERT(BP_VREQ(bp) == vreq); 911 912 gs_free(gs); 913 914 if (bioerr != 0) 915 bioerror(bp, bioerr); 916 ASSERT(vreq->v_nslots > 0); 917 if (--vreq->v_nslots > 0) 918 return; 919 920 /* remove this IO from our active queue */ 921 xdf_bp_pop(vdp, bp); 922 923 ASSERT(vreq->v_runq); 924 xdf_kstat_exit(vdp, bp); 925 vreq->v_runq = B_FALSE; 926 vreq_free(vdp, vreq); 927 928 if (IS_ERROR(bp)) { 929 xdf_io_err(bp, geterror(bp), 0); 930 } else if (bp->b_resid != 0) { 931 /* Partial transfers are an error */ 932 xdf_io_err(bp, EIO, bp->b_resid); 933 } else { 934 biodone(bp); 935 } 936 } 937 938 /* 939 * xdf interrupt handler 940 */ 941 static uint_t 942 xdf_intr_locked(xdf_t *vdp) 943 { 944 xendev_ring_t *xbr; 945 blkif_response_t *resp; 946 int bioerr; 947 uint64_t id; 948 uint8_t op; 949 uint16_t status; 950 ddi_acc_handle_t acchdl; 951 952 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 953 954 if ((xbr = vdp->xdf_xb_ring) == NULL) 955 return (DDI_INTR_UNCLAIMED); 956 957 acchdl = vdp->xdf_xb_ring_hdl; 958 959 /* 960 * complete all requests which have a response 961 */ 962 while (resp = xvdi_ring_get_response(xbr)) { 963 id = ddi_get64(acchdl, &resp->id); 964 op = ddi_get8(acchdl, &resp->operation); 965 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 966 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 967 op, id, status)); 968 969 if (status != BLKIF_RSP_OKAY) { 970 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 971 vdp->xdf_addr, 972 (op == BLKIF_OP_READ) ? "reading" : "writing")); 973 bioerr = EIO; 974 } else { 975 bioerr = 0; 976 } 977 978 xdf_io_fini(vdp, id, bioerr); 979 } 980 return (DDI_INTR_CLAIMED); 981 } 982 983 /* 984 * xdf_intr runs at PIL 5, so no one else can grab xdf_dev_lk and 985 * block at a lower pil. 986 */ 987 static uint_t 988 xdf_intr(caddr_t arg) 989 { 990 xdf_t *vdp = (xdf_t *)arg; 991 int rv; 992 993 mutex_enter(&vdp->xdf_dev_lk); 994 rv = xdf_intr_locked(vdp); 995 mutex_exit(&vdp->xdf_dev_lk); 996 997 if (!do_polled_io) 998 xdf_io_start(vdp); 999 1000 return (rv); 1001 } 1002 1003 static void 1004 xdf_ring_push(xdf_t *vdp) 1005 { 1006 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1007 1008 if (vdp->xdf_xb_ring == NULL) 1009 return; 1010 1011 if (xvdi_ring_push_request(vdp->xdf_xb_ring)) { 1012 DPRINTF(IO_DBG, ( 1013 "xdf@%s: xdf_ring_push: sent request(s) to backend\n", 1014 vdp->xdf_addr)); 1015 } 1016 1017 if (xvdi_get_evtchn(vdp->xdf_dip) != INVALID_EVTCHN) 1018 xvdi_notify_oe(vdp->xdf_dip); 1019 } 1020 1021 static int 1022 xdf_ring_drain_locked(xdf_t *vdp) 1023 { 1024 int pollc, rv = 0; 1025 1026 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1027 1028 if (xdf_debug & SUSRES_DBG) 1029 xen_printf("xdf_ring_drain: start\n"); 1030 1031 for (pollc = 0; pollc < XDF_DRAIN_RETRY_COUNT; pollc++) { 1032 if (vdp->xdf_xb_ring == NULL) 1033 goto out; 1034 1035 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1036 (void) xdf_intr_locked(vdp); 1037 if (!xvdi_ring_has_incomp_request(vdp->xdf_xb_ring)) 1038 goto out; 1039 xdf_ring_push(vdp); 1040 1041 /* file-backed devices can be slow */ 1042 mutex_exit(&vdp->xdf_dev_lk); 1043 #ifdef XPV_HVM_DRIVER 1044 (void) HYPERVISOR_yield(); 1045 #endif /* XPV_HVM_DRIVER */ 1046 delay(drv_usectohz(XDF_DRAIN_MSEC_DELAY)); 1047 mutex_enter(&vdp->xdf_dev_lk); 1048 } 1049 cmn_err(CE_WARN, "xdf@%s: xdf_ring_drain: timeout", vdp->xdf_addr); 1050 1051 out: 1052 if (vdp->xdf_xb_ring != NULL) { 1053 if (xvdi_ring_has_incomp_request(vdp->xdf_xb_ring) || 1054 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1055 rv = EIO; 1056 } 1057 if (xdf_debug & SUSRES_DBG) 1058 xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n", 1059 vdp->xdf_addr, rv); 1060 return (rv); 1061 } 1062 1063 static int 1064 xdf_ring_drain(xdf_t *vdp) 1065 { 1066 int rv; 1067 mutex_enter(&vdp->xdf_dev_lk); 1068 rv = xdf_ring_drain_locked(vdp); 1069 mutex_exit(&vdp->xdf_dev_lk); 1070 return (rv); 1071 } 1072 1073 /* 1074 * Destroy all v_req_t, grant table entries, and our ring buffer. 1075 */ 1076 static void 1077 xdf_ring_destroy(xdf_t *vdp) 1078 { 1079 v_req_t *vreq; 1080 buf_t *bp; 1081 ge_slot_t *gs; 1082 1083 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1084 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1085 1086 if ((vdp->xdf_state != XD_INIT) && 1087 (vdp->xdf_state != XD_CONNECTED) && 1088 (vdp->xdf_state != XD_READY)) { 1089 ASSERT(vdp->xdf_xb_ring == NULL); 1090 ASSERT(vdp->xdf_xb_ring_hdl == NULL); 1091 ASSERT(vdp->xdf_peer == INVALID_DOMID); 1092 ASSERT(vdp->xdf_evtchn == INVALID_EVTCHN); 1093 ASSERT(list_is_empty(&vdp->xdf_vreq_act)); 1094 return; 1095 } 1096 1097 /* 1098 * We don't want to recieve async notifications from the backend 1099 * when it finishes processing ring entries. 1100 */ 1101 #ifdef XPV_HVM_DRIVER 1102 ec_unbind_evtchn(vdp->xdf_evtchn); 1103 #else /* !XPV_HVM_DRIVER */ 1104 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1105 #endif /* !XPV_HVM_DRIVER */ 1106 1107 /* 1108 * Drain any requests in the ring. We need to do this before we 1109 * can free grant table entries, because if active ring entries 1110 * point to grants, then the backend could be trying to access 1111 * those grants. 1112 */ 1113 (void) xdf_ring_drain_locked(vdp); 1114 1115 /* We're done talking to the backend so free up our event channel */ 1116 xvdi_free_evtchn(vdp->xdf_dip); 1117 vdp->xdf_evtchn = INVALID_EVTCHN; 1118 1119 while ((vreq = list_head(&vdp->xdf_vreq_act)) != NULL) { 1120 bp = vreq->v_buf; 1121 ASSERT(BP_VREQ(bp) == vreq); 1122 1123 /* Free up any grant table entries associaed with this IO */ 1124 while ((gs = list_head(&vreq->v_gs)) != NULL) 1125 gs_free(gs); 1126 1127 /* If this IO was on the runq, move it back to the waitq. */ 1128 if (vreq->v_runq) 1129 xdf_kstat_runq_to_waitq(vdp, bp); 1130 1131 /* 1132 * Reset any buf IO state since we're going to re-issue the 1133 * IO when we reconnect. 1134 */ 1135 vreq_free(vdp, vreq); 1136 BP_VREQ_SET(bp, NULL); 1137 bioerror(bp, 0); 1138 } 1139 1140 /* reset the active queue index pointer */ 1141 vdp->xdf_i_act = vdp->xdf_f_act; 1142 1143 /* Destroy the ring */ 1144 xvdi_free_ring(vdp->xdf_xb_ring); 1145 vdp->xdf_xb_ring = NULL; 1146 vdp->xdf_xb_ring_hdl = NULL; 1147 vdp->xdf_peer = INVALID_DOMID; 1148 } 1149 1150 void 1151 xdfmin(struct buf *bp) 1152 { 1153 if (bp->b_bcount > xdf_maxphys) 1154 bp->b_bcount = xdf_maxphys; 1155 } 1156 1157 /* 1158 * Check if we have a pending "eject" media request. 1159 */ 1160 static int 1161 xdf_eject_pending(xdf_t *vdp) 1162 { 1163 dev_info_t *dip = vdp->xdf_dip; 1164 char *xsname, *str; 1165 1166 if (!vdp->xdf_media_req_supported) 1167 return (B_FALSE); 1168 1169 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1170 (xenbus_read_str(xsname, XBP_MEDIA_REQ, &str) != 0)) 1171 return (B_FALSE); 1172 1173 if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) { 1174 strfree(str); 1175 return (B_FALSE); 1176 } 1177 strfree(str); 1178 return (B_TRUE); 1179 } 1180 1181 /* 1182 * Generate a media request. 1183 */ 1184 static int 1185 xdf_media_req(xdf_t *vdp, char *req, boolean_t media_required) 1186 { 1187 dev_info_t *dip = vdp->xdf_dip; 1188 char *xsname; 1189 1190 /* 1191 * we can't be holding xdf_dev_lk because xenbus_printf() can 1192 * block while waiting for a PIL 1 interrupt message. this 1193 * would cause a deadlock with xdf_intr() which needs to grab 1194 * xdf_dev_lk as well and runs at PIL 5. 1195 */ 1196 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1197 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1198 1199 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1200 return (ENXIO); 1201 1202 /* Check if we support media requests */ 1203 if (!XD_IS_CD(vdp) || !vdp->xdf_media_req_supported) 1204 return (ENOTTY); 1205 1206 /* If an eject is pending then don't allow any new requests */ 1207 if (xdf_eject_pending(vdp)) 1208 return (ENXIO); 1209 1210 /* Make sure that there is media present */ 1211 if (media_required && (vdp->xdf_xdev_nblocks == 0)) 1212 return (ENXIO); 1213 1214 /* We only allow operations when the device is ready and connected */ 1215 if (vdp->xdf_state != XD_READY) 1216 return (EIO); 1217 1218 if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ, "%s", req) != 0) 1219 return (EIO); 1220 1221 return (0); 1222 } 1223 1224 /* 1225 * populate a single blkif_request_t w/ a buf 1226 */ 1227 static void 1228 xdf_process_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1229 { 1230 grant_ref_t gr; 1231 uint8_t fsect, lsect; 1232 size_t bcnt; 1233 paddr_t dma_addr; 1234 off_t blk_off; 1235 dev_info_t *dip = vdp->xdf_dip; 1236 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1237 v_req_t *vreq = BP_VREQ(bp); 1238 uint64_t blkno = vreq->v_blkno; 1239 uint_t ndmacs = vreq->v_ndmacs; 1240 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1241 int seg = 0; 1242 int isread = IS_READ(bp); 1243 ge_slot_t *gs = list_head(&vreq->v_gs); 1244 1245 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1246 ASSERT(vreq->v_status == VREQ_GS_ALLOCED); 1247 1248 if (isread) 1249 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1250 else { 1251 switch (vreq->v_flush_diskcache) { 1252 case FLUSH_DISKCACHE: 1253 ddi_put8(acchdl, &rreq->operation, 1254 BLKIF_OP_FLUSH_DISKCACHE); 1255 ddi_put16(acchdl, &rreq->handle, vdev); 1256 ddi_put64(acchdl, &rreq->id, 1257 (uint64_t)(uintptr_t)(gs)); 1258 ddi_put8(acchdl, &rreq->nr_segments, 0); 1259 vreq->v_status = VREQ_DMAWIN_DONE; 1260 return; 1261 case WRITE_BARRIER: 1262 ddi_put8(acchdl, &rreq->operation, 1263 BLKIF_OP_WRITE_BARRIER); 1264 break; 1265 default: 1266 if (!vdp->xdf_wce) 1267 ddi_put8(acchdl, &rreq->operation, 1268 BLKIF_OP_WRITE_BARRIER); 1269 else 1270 ddi_put8(acchdl, &rreq->operation, 1271 BLKIF_OP_WRITE); 1272 break; 1273 } 1274 } 1275 1276 ddi_put16(acchdl, &rreq->handle, vdev); 1277 ddi_put64(acchdl, &rreq->sector_number, blkno); 1278 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(gs)); 1279 1280 /* 1281 * loop until all segments are populated or no more dma cookie in buf 1282 */ 1283 for (;;) { 1284 /* 1285 * Each segment of a blkif request can transfer up to 1286 * one 4K page of data. 1287 */ 1288 bcnt = vreq->v_dmac.dmac_size; 1289 dma_addr = vreq->v_dmac.dmac_laddress; 1290 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1291 fsect = blk_off >> XB_BSHIFT; 1292 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1293 1294 ASSERT(bcnt <= PAGESIZE); 1295 ASSERT((bcnt % XB_BSIZE) == 0); 1296 ASSERT((blk_off & XB_BMASK) == 0); 1297 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1298 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1299 1300 gr = gs_grant(gs, PATOMA(dma_addr) >> PAGESHIFT); 1301 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1302 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1303 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1304 1305 DPRINTF(IO_DBG, ( 1306 "xdf@%s: seg%d: dmacS %lu blk_off %ld\n", 1307 vdp->xdf_addr, seg, vreq->v_dmac.dmac_size, blk_off)); 1308 DPRINTF(IO_DBG, ( 1309 "xdf@%s: seg%d: fs %d ls %d gr %d dma 0x%"PRIx64"\n", 1310 vdp->xdf_addr, seg, fsect, lsect, gr, dma_addr)); 1311 1312 blkno += (bcnt >> XB_BSHIFT); 1313 seg++; 1314 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1315 if (--ndmacs) { 1316 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1317 continue; 1318 } 1319 1320 vreq->v_status = VREQ_DMAWIN_DONE; 1321 vreq->v_blkno = blkno; 1322 break; 1323 } 1324 ddi_put8(acchdl, &rreq->nr_segments, seg); 1325 DPRINTF(IO_DBG, ( 1326 "xdf@%s: xdf_process_rreq: request id=%"PRIx64" ready\n", 1327 vdp->xdf_addr, rreq->id)); 1328 } 1329 1330 static void 1331 xdf_io_start(xdf_t *vdp) 1332 { 1333 struct buf *bp; 1334 v_req_t *vreq; 1335 blkif_request_t *rreq; 1336 boolean_t rreqready = B_FALSE; 1337 1338 mutex_enter(&vdp->xdf_dev_lk); 1339 1340 /* 1341 * Populate the ring request(s). Loop until there is no buf to 1342 * transfer or no free slot available in I/O ring. 1343 */ 1344 for (;;) { 1345 /* don't start any new IO if we're suspending */ 1346 if (vdp->xdf_suspending) 1347 break; 1348 if ((bp = xdf_bp_next(vdp)) == NULL) 1349 break; 1350 1351 /* if the buf doesn't already have a vreq, allocate one */ 1352 if (((vreq = BP_VREQ(bp)) == NULL) && 1353 ((vreq = vreq_get(vdp, bp)) == NULL)) 1354 break; 1355 1356 /* alloc DMA/GTE resources */ 1357 if (vreq_setup(vdp, vreq) != DDI_SUCCESS) 1358 break; 1359 1360 /* get next blkif_request in the ring */ 1361 if ((rreq = xvdi_ring_get_request(vdp->xdf_xb_ring)) == NULL) 1362 break; 1363 bzero(rreq, sizeof (blkif_request_t)); 1364 rreqready = B_TRUE; 1365 1366 /* populate blkif_request with this buf */ 1367 xdf_process_rreq(vdp, bp, rreq); 1368 1369 /* 1370 * This buffer/vreq pair is has been allocated a ring buffer 1371 * resources, so if it isn't already in our runq, add it. 1372 */ 1373 if (!vreq->v_runq) 1374 xdf_kstat_waitq_to_runq(vdp, bp); 1375 } 1376 1377 /* Send the request(s) to the backend */ 1378 if (rreqready) 1379 xdf_ring_push(vdp); 1380 1381 mutex_exit(&vdp->xdf_dev_lk); 1382 } 1383 1384 1385 /* check if partition is open, -1 - check all partitions on the disk */ 1386 static boolean_t 1387 xdf_isopen(xdf_t *vdp, int partition) 1388 { 1389 int i; 1390 ulong_t parbit; 1391 boolean_t rval = B_FALSE; 1392 1393 ASSERT((partition == -1) || 1394 ((partition >= 0) || (partition < XDF_PEXT))); 1395 1396 if (partition == -1) 1397 parbit = (ulong_t)-1; 1398 else 1399 parbit = 1 << partition; 1400 1401 for (i = 0; i < OTYPCNT; i++) { 1402 if (vdp->xdf_vd_open[i] & parbit) 1403 rval = B_TRUE; 1404 } 1405 1406 return (rval); 1407 } 1408 1409 /* 1410 * The connection should never be closed as long as someone is holding 1411 * us open, there is pending IO, or someone is waiting waiting for a 1412 * connection. 1413 */ 1414 static boolean_t 1415 xdf_busy(xdf_t *vdp) 1416 { 1417 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1418 1419 if ((vdp->xdf_xb_ring != NULL) && 1420 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 1421 ASSERT(vdp->xdf_state != XD_CLOSED); 1422 return (B_TRUE); 1423 } 1424 1425 if (!list_is_empty(&vdp->xdf_vreq_act) || (vdp->xdf_f_act != NULL)) { 1426 ASSERT(vdp->xdf_state != XD_CLOSED); 1427 return (B_TRUE); 1428 } 1429 1430 if (xdf_isopen(vdp, -1)) { 1431 ASSERT(vdp->xdf_state != XD_CLOSED); 1432 return (B_TRUE); 1433 } 1434 1435 if (vdp->xdf_connect_req > 0) { 1436 ASSERT(vdp->xdf_state != XD_CLOSED); 1437 return (B_TRUE); 1438 } 1439 1440 return (B_FALSE); 1441 } 1442 1443 static void 1444 xdf_set_state(xdf_t *vdp, xdf_state_t new_state) 1445 { 1446 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1447 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1448 DPRINTF(DDI_DBG, ("xdf@%s: state change %d -> %d\n", 1449 vdp->xdf_addr, vdp->xdf_state, new_state)); 1450 vdp->xdf_state = new_state; 1451 cv_broadcast(&vdp->xdf_dev_cv); 1452 } 1453 1454 static void 1455 xdf_disconnect(xdf_t *vdp, xdf_state_t new_state, boolean_t quiet) 1456 { 1457 dev_info_t *dip = vdp->xdf_dip; 1458 boolean_t busy; 1459 1460 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1461 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1462 ASSERT((new_state == XD_UNKNOWN) || (new_state == XD_CLOSED)); 1463 1464 /* Check if we're already there. */ 1465 if (vdp->xdf_state == new_state) 1466 return; 1467 1468 mutex_enter(&vdp->xdf_dev_lk); 1469 busy = xdf_busy(vdp); 1470 1471 /* If we're already closed then there's nothing todo. */ 1472 if (vdp->xdf_state == XD_CLOSED) { 1473 ASSERT(!busy); 1474 xdf_set_state(vdp, new_state); 1475 mutex_exit(&vdp->xdf_dev_lk); 1476 return; 1477 } 1478 1479 #ifdef DEBUG 1480 /* UhOh. Warn the user that something bad has happened. */ 1481 if (!quiet && busy && (vdp->xdf_state == XD_READY) && 1482 (vdp->xdf_xdev_nblocks != 0)) { 1483 cmn_err(CE_WARN, "xdf@%s: disconnected while in use", 1484 vdp->xdf_addr); 1485 } 1486 #endif /* DEBUG */ 1487 1488 xdf_ring_destroy(vdp); 1489 1490 /* If we're busy then we can only go into the unknown state */ 1491 xdf_set_state(vdp, (busy) ? XD_UNKNOWN : new_state); 1492 mutex_exit(&vdp->xdf_dev_lk); 1493 1494 /* if we're closed now, let the other end know */ 1495 if (vdp->xdf_state == XD_CLOSED) 1496 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1497 } 1498 1499 1500 /* 1501 * Kick-off connect process 1502 * Status should be XD_UNKNOWN or XD_CLOSED 1503 * On success, status will be changed to XD_INIT 1504 * On error, it will be changed to XD_UNKNOWN 1505 */ 1506 static int 1507 xdf_setstate_init(xdf_t *vdp) 1508 { 1509 dev_info_t *dip = vdp->xdf_dip; 1510 xenbus_transaction_t xbt; 1511 grant_ref_t gref; 1512 char *xsname, *str; 1513 int rv; 1514 1515 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1516 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1517 ASSERT((vdp->xdf_state == XD_UNKNOWN) || 1518 (vdp->xdf_state == XD_CLOSED)); 1519 1520 DPRINTF(DDI_DBG, 1521 ("xdf@%s: starting connection process\n", vdp->xdf_addr)); 1522 1523 /* 1524 * If an eject is pending then don't allow a new connection. 1525 * (Only the backend can clear media request eject request.) 1526 */ 1527 if (xdf_eject_pending(vdp)) 1528 return (DDI_FAILURE); 1529 1530 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1531 goto errout; 1532 1533 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == INVALID_DOMID) 1534 goto errout; 1535 1536 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialising); 1537 1538 /* 1539 * Sanity check for the existance of the xenbus device-type property. 1540 * This property might not exist if we our xenbus device nodes was 1541 * force destroyed while we were still connected to the backend. 1542 */ 1543 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) 1544 goto errout; 1545 strfree(str); 1546 1547 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) 1548 goto errout; 1549 1550 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1551 #ifdef XPV_HVM_DRIVER 1552 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1553 #else /* !XPV_HVM_DRIVER */ 1554 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1555 DDI_SUCCESS) { 1556 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_init: " 1557 "failed to add intr handler", vdp->xdf_addr); 1558 goto errout1; 1559 } 1560 #endif /* !XPV_HVM_DRIVER */ 1561 1562 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1563 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1564 DDI_SUCCESS) { 1565 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1566 vdp->xdf_addr); 1567 goto errout2; 1568 } 1569 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1570 1571 /* 1572 * Write into xenstore the info needed by backend 1573 */ 1574 trans_retry: 1575 if (xenbus_transaction_start(&xbt)) { 1576 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1577 vdp->xdf_addr); 1578 xvdi_fatal_error(dip, EIO, "connect transaction init"); 1579 goto fail_trans; 1580 } 1581 1582 /* 1583 * XBP_PROTOCOL is written by the domain builder in the case of PV 1584 * domains. However, it is not written for HVM domains, so let's 1585 * write it here. 1586 */ 1587 if (((rv = xenbus_printf(xbt, xsname, 1588 XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE)) != 0) || 1589 ((rv = xenbus_printf(xbt, xsname, 1590 XBP_RING_REF, "%u", gref)) != 0) || 1591 ((rv = xenbus_printf(xbt, xsname, 1592 XBP_EVENT_CHAN, "%u", vdp->xdf_evtchn)) != 0) || 1593 ((rv = xenbus_printf(xbt, xsname, 1594 XBP_PROTOCOL, "%s", XEN_IO_PROTO_ABI_NATIVE)) != 0) || 1595 ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0)) { 1596 (void) xenbus_transaction_end(xbt, 1); 1597 xvdi_fatal_error(dip, rv, "connect transaction setup"); 1598 goto fail_trans; 1599 } 1600 1601 /* kick-off connect process */ 1602 if (rv = xenbus_transaction_end(xbt, 0)) { 1603 if (rv == EAGAIN) 1604 goto trans_retry; 1605 xvdi_fatal_error(dip, rv, "connect transaction commit"); 1606 goto fail_trans; 1607 } 1608 1609 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1610 mutex_enter(&vdp->xdf_dev_lk); 1611 xdf_set_state(vdp, XD_INIT); 1612 mutex_exit(&vdp->xdf_dev_lk); 1613 1614 return (DDI_SUCCESS); 1615 1616 fail_trans: 1617 xvdi_free_ring(vdp->xdf_xb_ring); 1618 errout2: 1619 #ifdef XPV_HVM_DRIVER 1620 ec_unbind_evtchn(vdp->xdf_evtchn); 1621 #else /* !XPV_HVM_DRIVER */ 1622 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1623 #endif /* !XPV_HVM_DRIVER */ 1624 errout1: 1625 xvdi_free_evtchn(dip); 1626 vdp->xdf_evtchn = INVALID_EVTCHN; 1627 errout: 1628 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1629 cmn_err(CE_WARN, "xdf@%s: failed to start connection to backend", 1630 vdp->xdf_addr); 1631 return (DDI_FAILURE); 1632 } 1633 1634 int 1635 xdf_get_flush_block(xdf_t *vdp) 1636 { 1637 /* 1638 * Get a DEV_BSIZE aligned bufer 1639 */ 1640 vdp->xdf_flush_mem = kmem_alloc(vdp->xdf_xdev_secsize * 2, KM_SLEEP); 1641 vdp->xdf_cache_flush_block = 1642 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), 1643 (int)vdp->xdf_xdev_secsize); 1644 1645 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1646 xdf_flush_block, vdp->xdf_xdev_secsize, NULL) != 0) 1647 return (DDI_FAILURE); 1648 return (DDI_SUCCESS); 1649 } 1650 1651 static void 1652 xdf_setstate_ready(void *arg) 1653 { 1654 xdf_t *vdp = (xdf_t *)arg; 1655 1656 vdp->xdf_ready_tq_thread = curthread; 1657 1658 /* 1659 * We've created all the minor nodes via cmlb_attach() using default 1660 * value in xdf_attach() to make it possible to block in xdf_open(), 1661 * in case there's anyone (say, booting thread) ever trying to open 1662 * it before connected to backend. We will refresh all those minor 1663 * nodes w/ latest info we've got now when we are almost connected. 1664 */ 1665 mutex_enter(&vdp->xdf_dev_lk); 1666 if (vdp->xdf_cmbl_reattach) { 1667 vdp->xdf_cmbl_reattach = B_FALSE; 1668 1669 mutex_exit(&vdp->xdf_dev_lk); 1670 if (xdf_cmlb_attach(vdp) != 0) { 1671 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1672 return; 1673 } 1674 mutex_enter(&vdp->xdf_dev_lk); 1675 } 1676 1677 /* If we're not still trying to get to the ready state, then bail. */ 1678 if (vdp->xdf_state != XD_CONNECTED) { 1679 mutex_exit(&vdp->xdf_dev_lk); 1680 return; 1681 } 1682 mutex_exit(&vdp->xdf_dev_lk); 1683 1684 /* 1685 * If backend has feature-barrier, see if it supports disk 1686 * cache flush op. 1687 */ 1688 vdp->xdf_flush_supported = B_FALSE; 1689 if (vdp->xdf_feature_barrier) { 1690 /* 1691 * Pretend we already know flush is supported so probe 1692 * will attempt the correct op. 1693 */ 1694 vdp->xdf_flush_supported = B_TRUE; 1695 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1696 vdp->xdf_flush_supported = B_TRUE; 1697 } else { 1698 vdp->xdf_flush_supported = B_FALSE; 1699 /* 1700 * If the other end does not support the cache flush op 1701 * then we must use a barrier-write to force disk 1702 * cache flushing. Barrier writes require that a data 1703 * block actually be written. 1704 * Cache a block to barrier-write when we are 1705 * asked to perform a flush. 1706 * XXX - would it be better to just copy 1 block 1707 * (512 bytes) from whatever write we did last 1708 * and rewrite that block? 1709 */ 1710 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) { 1711 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1712 return; 1713 } 1714 } 1715 } 1716 1717 mutex_enter(&vdp->xdf_cb_lk); 1718 mutex_enter(&vdp->xdf_dev_lk); 1719 if (vdp->xdf_state == XD_CONNECTED) 1720 xdf_set_state(vdp, XD_READY); 1721 mutex_exit(&vdp->xdf_dev_lk); 1722 1723 /* Restart any currently queued up io */ 1724 xdf_io_start(vdp); 1725 1726 mutex_exit(&vdp->xdf_cb_lk); 1727 } 1728 1729 /* 1730 * synthetic geometry 1731 */ 1732 #define XDF_NSECTS 256 1733 #define XDF_NHEADS 16 1734 1735 static void 1736 xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp) 1737 { 1738 xdf_t *vdp; 1739 uint_t ncyl; 1740 1741 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 1742 1743 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); 1744 1745 bzero(geomp, sizeof (*geomp)); 1746 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1747 geomp->g_acyl = 0; 1748 geomp->g_nhead = XDF_NHEADS; 1749 geomp->g_nsect = XDF_NSECTS; 1750 geomp->g_secsize = vdp->xdf_xdev_secsize; 1751 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1752 geomp->g_intrlv = 0; 1753 geomp->g_rpm = 7200; 1754 } 1755 1756 /* 1757 * Finish other initialization after we've connected to backend 1758 * Status should be XD_INIT before calling this routine 1759 * On success, status should be changed to XD_CONNECTED. 1760 * On error, status should stay XD_INIT 1761 */ 1762 static int 1763 xdf_setstate_connected(xdf_t *vdp) 1764 { 1765 dev_info_t *dip = vdp->xdf_dip; 1766 cmlb_geom_t pgeom; 1767 diskaddr_t nblocks = 0; 1768 uint_t secsize = 0; 1769 char *oename, *xsname, *str; 1770 uint_t dinfo; 1771 1772 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1773 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1774 ASSERT(vdp->xdf_state == XD_INIT); 1775 1776 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1777 ((oename = xvdi_get_oename(dip)) == NULL)) 1778 return (DDI_FAILURE); 1779 1780 /* Make sure the other end is XenbusStateConnected */ 1781 if (xenbus_read_driver_state(oename) != XenbusStateConnected) 1782 return (DDI_FAILURE); 1783 1784 /* Determine if feature barrier is supported by backend */ 1785 if (!(vdp->xdf_feature_barrier = xenbus_exists(oename, XBP_FB))) 1786 cmn_err(CE_NOTE, "!xdf@%s: feature-barrier not supported", 1787 vdp->xdf_addr); 1788 1789 /* 1790 * Probe backend. Read the device size into xdf_xdev_nblocks 1791 * and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE 1792 * flags in xdf_dinfo. If the emulated device type is "cdrom", 1793 * we always set VDISK_CDROM, regardless of if it's present in 1794 * the xenbus info parameter. 1795 */ 1796 if (xenbus_gather(XBT_NULL, oename, 1797 XBP_SECTORS, "%"SCNu64, &nblocks, 1798 XBP_SECTOR_SIZE, "%u", &secsize, 1799 XBP_INFO, "%u", &dinfo, 1800 NULL) != 0) { 1801 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1802 "cannot read backend info", vdp->xdf_addr); 1803 return (DDI_FAILURE); 1804 } 1805 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 1806 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 1807 vdp->xdf_addr); 1808 return (DDI_FAILURE); 1809 } 1810 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 1811 dinfo |= VDISK_CDROM; 1812 strfree(str); 1813 1814 if (secsize == 0 || !(ISP2(secsize / DEV_BSIZE))) 1815 secsize = DEV_BSIZE; 1816 vdp->xdf_xdev_nblocks = nblocks; 1817 vdp->xdf_xdev_secsize = secsize; 1818 #ifdef _ILP32 1819 if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) { 1820 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1821 "backend disk device too large with %llu blocks for" 1822 " 32-bit kernel", vdp->xdf_addr, vdp->xdf_xdev_nblocks); 1823 xvdi_fatal_error(dip, EFBIG, "reading backend info"); 1824 return (DDI_FAILURE); 1825 } 1826 #endif 1827 1828 /* 1829 * If the physical geometry for a fixed disk has been explicity 1830 * set then make sure that the specified physical geometry isn't 1831 * larger than the device we connected to. 1832 */ 1833 if (vdp->xdf_pgeom_fixed && 1834 (vdp->xdf_pgeom.g_capacity > vdp->xdf_xdev_nblocks)) { 1835 cmn_err(CE_WARN, 1836 "xdf@%s: connect failed, fixed geometry too large", 1837 vdp->xdf_addr); 1838 return (DDI_FAILURE); 1839 } 1840 1841 vdp->xdf_media_req_supported = xenbus_exists(oename, XBP_MEDIA_REQ_SUP); 1842 1843 /* mark vbd is ready for I/O */ 1844 mutex_enter(&vdp->xdf_dev_lk); 1845 xdf_set_state(vdp, XD_CONNECTED); 1846 1847 /* check if the cmlb label should be updated */ 1848 xdf_synthetic_pgeom(dip, &pgeom); 1849 if ((vdp->xdf_dinfo != dinfo) || 1850 (!vdp->xdf_pgeom_fixed && 1851 (memcmp(&vdp->xdf_pgeom, &pgeom, sizeof (pgeom)) != 0))) { 1852 vdp->xdf_cmbl_reattach = B_TRUE; 1853 1854 vdp->xdf_dinfo = dinfo; 1855 if (!vdp->xdf_pgeom_fixed) 1856 vdp->xdf_pgeom = pgeom; 1857 } 1858 1859 if (XD_IS_CD(vdp) || XD_IS_RM(vdp)) { 1860 if (vdp->xdf_xdev_nblocks == 0) { 1861 vdp->xdf_mstate = DKIO_EJECTED; 1862 cv_broadcast(&vdp->xdf_mstate_cv); 1863 } else { 1864 vdp->xdf_mstate = DKIO_INSERTED; 1865 cv_broadcast(&vdp->xdf_mstate_cv); 1866 } 1867 } else { 1868 if (vdp->xdf_mstate != DKIO_NONE) { 1869 vdp->xdf_mstate = DKIO_NONE; 1870 cv_broadcast(&vdp->xdf_mstate_cv); 1871 } 1872 } 1873 1874 mutex_exit(&vdp->xdf_dev_lk); 1875 1876 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", vdp->xdf_addr, 1877 (uint64_t)vdp->xdf_xdev_nblocks); 1878 1879 /* Restart any currently queued up io */ 1880 xdf_io_start(vdp); 1881 1882 /* 1883 * To get to the ready state we have to do IO to the backend device, 1884 * but we can't initiate IO from the other end change callback thread 1885 * (which is the current context we're executing in.) This is because 1886 * if the other end disconnects while we're doing IO from the callback 1887 * thread, then we can't recieve that disconnect event and we hang 1888 * waiting for an IO that can never complete. 1889 */ 1890 (void) ddi_taskq_dispatch(vdp->xdf_ready_tq, xdf_setstate_ready, vdp, 1891 DDI_SLEEP); 1892 1893 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1894 return (DDI_SUCCESS); 1895 } 1896 1897 /*ARGSUSED*/ 1898 static void 1899 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 1900 { 1901 XenbusState new_state = *(XenbusState *)impl_data; 1902 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 1903 1904 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 1905 vdp->xdf_addr, new_state)); 1906 1907 mutex_enter(&vdp->xdf_cb_lk); 1908 1909 /* We assume that this callback is single threaded */ 1910 ASSERT(vdp->xdf_oe_change_thread == NULL); 1911 DEBUG_EVAL(vdp->xdf_oe_change_thread = curthread); 1912 1913 /* ignore any backend state changes if we're suspending/suspended */ 1914 if (vdp->xdf_suspending || (vdp->xdf_state == XD_SUSPEND)) { 1915 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1916 mutex_exit(&vdp->xdf_cb_lk); 1917 return; 1918 } 1919 1920 switch (new_state) { 1921 case XenbusStateUnknown: 1922 case XenbusStateInitialising: 1923 case XenbusStateInitWait: 1924 case XenbusStateInitialised: 1925 if (vdp->xdf_state == XD_INIT) 1926 break; 1927 1928 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1929 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1930 break; 1931 ASSERT(vdp->xdf_state == XD_INIT); 1932 break; 1933 1934 case XenbusStateConnected: 1935 if ((vdp->xdf_state == XD_CONNECTED) || 1936 (vdp->xdf_state == XD_READY)) 1937 break; 1938 1939 if (vdp->xdf_state != XD_INIT) { 1940 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1941 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1942 break; 1943 ASSERT(vdp->xdf_state == XD_INIT); 1944 } 1945 1946 if (xdf_setstate_connected(vdp) != DDI_SUCCESS) { 1947 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1948 break; 1949 } 1950 ASSERT(vdp->xdf_state == XD_CONNECTED); 1951 break; 1952 1953 case XenbusStateClosing: 1954 if (xdf_isopen(vdp, -1)) { 1955 cmn_err(CE_NOTE, 1956 "xdf@%s: hot-unplug failed, still in use", 1957 vdp->xdf_addr); 1958 break; 1959 } 1960 /*FALLTHROUGH*/ 1961 case XenbusStateClosed: 1962 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 1963 break; 1964 } 1965 1966 /* notify anybody waiting for oe state change */ 1967 cv_broadcast(&vdp->xdf_dev_cv); 1968 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1969 mutex_exit(&vdp->xdf_cb_lk); 1970 } 1971 1972 static int 1973 xdf_connect_locked(xdf_t *vdp, boolean_t wait) 1974 { 1975 int rv, timeouts = 0, reset = 20; 1976 1977 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1978 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1979 1980 /* we can't connect once we're in the closed state */ 1981 if (vdp->xdf_state == XD_CLOSED) 1982 return (XD_CLOSED); 1983 1984 vdp->xdf_connect_req++; 1985 while (vdp->xdf_state != XD_READY) { 1986 mutex_exit(&vdp->xdf_dev_lk); 1987 1988 /* only one thread at a time can be the connection thread */ 1989 if (vdp->xdf_connect_thread == NULL) 1990 vdp->xdf_connect_thread = curthread; 1991 1992 if (vdp->xdf_connect_thread == curthread) { 1993 if ((timeouts > 0) && ((timeouts % reset) == 0)) { 1994 /* 1995 * If we haven't establised a connection 1996 * within the reset time, then disconnect 1997 * so we can try again, and double the reset 1998 * time. The reset time starts at 2 sec. 1999 */ 2000 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 2001 reset *= 2; 2002 } 2003 if (vdp->xdf_state == XD_UNKNOWN) 2004 (void) xdf_setstate_init(vdp); 2005 if (vdp->xdf_state == XD_INIT) 2006 (void) xdf_setstate_connected(vdp); 2007 } 2008 2009 mutex_enter(&vdp->xdf_dev_lk); 2010 if (!wait || (vdp->xdf_state == XD_READY)) 2011 goto out; 2012 2013 mutex_exit((&vdp->xdf_cb_lk)); 2014 if (vdp->xdf_connect_thread != curthread) { 2015 rv = cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk); 2016 } else { 2017 /* delay for 0.1 sec */ 2018 rv = cv_reltimedwait_sig(&vdp->xdf_dev_cv, 2019 &vdp->xdf_dev_lk, drv_usectohz(100*1000), 2020 TR_CLOCK_TICK); 2021 if (rv == -1) 2022 timeouts++; 2023 } 2024 mutex_exit((&vdp->xdf_dev_lk)); 2025 mutex_enter((&vdp->xdf_cb_lk)); 2026 mutex_enter((&vdp->xdf_dev_lk)); 2027 if (rv == 0) 2028 goto out; 2029 } 2030 2031 out: 2032 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2033 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 2034 2035 if (vdp->xdf_connect_thread == curthread) { 2036 /* 2037 * wake up someone else so they can become the connection 2038 * thread. 2039 */ 2040 cv_signal(&vdp->xdf_dev_cv); 2041 vdp->xdf_connect_thread = NULL; 2042 } 2043 2044 /* Try to lock the media */ 2045 mutex_exit((&vdp->xdf_dev_lk)); 2046 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2047 mutex_enter((&vdp->xdf_dev_lk)); 2048 2049 vdp->xdf_connect_req--; 2050 return (vdp->xdf_state); 2051 } 2052 2053 static uint_t 2054 xdf_iorestart(caddr_t arg) 2055 { 2056 xdf_t *vdp = (xdf_t *)arg; 2057 2058 ASSERT(vdp != NULL); 2059 2060 mutex_enter(&vdp->xdf_dev_lk); 2061 ASSERT(ISDMACBON(vdp)); 2062 SETDMACBOFF(vdp); 2063 mutex_exit(&vdp->xdf_dev_lk); 2064 2065 xdf_io_start(vdp); 2066 2067 return (DDI_INTR_CLAIMED); 2068 } 2069 2070 #if defined(XPV_HVM_DRIVER) 2071 2072 typedef struct xdf_hvm_entry { 2073 list_node_t xdf_he_list; 2074 char *xdf_he_path; 2075 dev_info_t *xdf_he_dip; 2076 } xdf_hvm_entry_t; 2077 2078 static list_t xdf_hvm_list; 2079 static kmutex_t xdf_hvm_list_lock; 2080 2081 static xdf_hvm_entry_t * 2082 i_xdf_hvm_find(const char *path, dev_info_t *dip) 2083 { 2084 xdf_hvm_entry_t *i; 2085 2086 ASSERT((path != NULL) || (dip != NULL)); 2087 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); 2088 2089 i = list_head(&xdf_hvm_list); 2090 while (i != NULL) { 2091 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { 2092 i = list_next(&xdf_hvm_list, i); 2093 continue; 2094 } 2095 if ((dip != NULL) && (i->xdf_he_dip != dip)) { 2096 i = list_next(&xdf_hvm_list, i); 2097 continue; 2098 } 2099 break; 2100 } 2101 return (i); 2102 } 2103 2104 dev_info_t * 2105 xdf_hvm_hold(const char *path) 2106 { 2107 xdf_hvm_entry_t *i; 2108 dev_info_t *dip; 2109 2110 mutex_enter(&xdf_hvm_list_lock); 2111 i = i_xdf_hvm_find(path, NULL); 2112 if (i == NULL) { 2113 mutex_exit(&xdf_hvm_list_lock); 2114 return (B_FALSE); 2115 } 2116 ndi_hold_devi(dip = i->xdf_he_dip); 2117 mutex_exit(&xdf_hvm_list_lock); 2118 return (dip); 2119 } 2120 2121 static void 2122 xdf_hvm_add(dev_info_t *dip) 2123 { 2124 xdf_hvm_entry_t *i; 2125 char *path; 2126 2127 /* figure out the path for the dip */ 2128 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 2129 (void) ddi_pathname(dip, path); 2130 2131 i = kmem_alloc(sizeof (*i), KM_SLEEP); 2132 i->xdf_he_dip = dip; 2133 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); 2134 2135 mutex_enter(&xdf_hvm_list_lock); 2136 ASSERT(i_xdf_hvm_find(path, NULL) == NULL); 2137 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); 2138 list_insert_head(&xdf_hvm_list, i); 2139 mutex_exit(&xdf_hvm_list_lock); 2140 2141 kmem_free(path, MAXPATHLEN); 2142 } 2143 2144 static void 2145 xdf_hvm_rm(dev_info_t *dip) 2146 { 2147 xdf_hvm_entry_t *i; 2148 2149 mutex_enter(&xdf_hvm_list_lock); 2150 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); 2151 list_remove(&xdf_hvm_list, i); 2152 mutex_exit(&xdf_hvm_list_lock); 2153 2154 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); 2155 kmem_free(i, sizeof (*i)); 2156 } 2157 2158 static void 2159 xdf_hvm_init(void) 2160 { 2161 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), 2162 offsetof(xdf_hvm_entry_t, xdf_he_list)); 2163 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); 2164 } 2165 2166 static void 2167 xdf_hvm_fini(void) 2168 { 2169 ASSERT(list_head(&xdf_hvm_list) == NULL); 2170 list_destroy(&xdf_hvm_list); 2171 mutex_destroy(&xdf_hvm_list_lock); 2172 } 2173 2174 boolean_t 2175 xdf_hvm_connect(dev_info_t *dip) 2176 { 2177 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2178 char *oename, *str; 2179 int rv; 2180 2181 mutex_enter(&vdp->xdf_cb_lk); 2182 2183 /* 2184 * Before try to establish a connection we need to wait for the 2185 * backend hotplug scripts to have run. Once they are run the 2186 * "<oename>/hotplug-status" property will be set to "connected". 2187 */ 2188 for (;;) { 2189 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2190 2191 /* 2192 * Get the xenbus path to the backend device. Note that 2193 * we can't cache this path (and we look it up on each pass 2194 * through this loop) because it could change during 2195 * suspend, resume, and migration operations. 2196 */ 2197 if ((oename = xvdi_get_oename(dip)) == NULL) { 2198 mutex_exit(&vdp->xdf_cb_lk); 2199 return (B_FALSE); 2200 } 2201 2202 str = NULL; 2203 if ((xenbus_read_str(oename, XBP_HP_STATUS, &str) == 0) && 2204 (strcmp(str, XBV_HP_STATUS_CONN) == 0)) 2205 break; 2206 2207 if (str != NULL) 2208 strfree(str); 2209 2210 /* wait for an update to "<oename>/hotplug-status" */ 2211 if (cv_wait_sig(&vdp->xdf_hp_status_cv, &vdp->xdf_cb_lk) == 0) { 2212 /* we got interrupted by a signal */ 2213 mutex_exit(&vdp->xdf_cb_lk); 2214 return (B_FALSE); 2215 } 2216 } 2217 2218 /* Good news. The backend hotplug scripts have been run. */ 2219 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2220 ASSERT(strcmp(str, XBV_HP_STATUS_CONN) == 0); 2221 strfree(str); 2222 2223 /* 2224 * If we're emulating a cd device and if the backend doesn't support 2225 * media request opreations, then we're not going to bother trying 2226 * to establish a connection for a couple reasons. First off, media 2227 * requests support is required to support operations like eject and 2228 * media locking. Second, other backend platforms like Linux don't 2229 * support hvm pv cdrom access. They don't even have a backend pv 2230 * driver for cdrom device nodes, so we don't want to block forever 2231 * waiting for a connection to a backend driver that doesn't exist. 2232 */ 2233 if (XD_IS_CD(vdp) && !xenbus_exists(oename, XBP_MEDIA_REQ_SUP)) { 2234 mutex_exit(&vdp->xdf_cb_lk); 2235 return (B_FALSE); 2236 } 2237 2238 mutex_enter(&vdp->xdf_dev_lk); 2239 rv = xdf_connect_locked(vdp, B_TRUE); 2240 mutex_exit(&vdp->xdf_dev_lk); 2241 mutex_exit(&vdp->xdf_cb_lk); 2242 2243 return ((rv == XD_READY) ? B_TRUE : B_FALSE); 2244 } 2245 2246 int 2247 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2248 { 2249 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2250 2251 /* sanity check the requested physical geometry */ 2252 mutex_enter(&vdp->xdf_dev_lk); 2253 if ((geomp->g_secsize != XB_BSIZE) || 2254 (geomp->g_capacity == 0)) { 2255 mutex_exit(&vdp->xdf_dev_lk); 2256 return (EINVAL); 2257 } 2258 2259 /* 2260 * If we've already connected to the backend device then make sure 2261 * we're not defining a physical geometry larger than our backend 2262 * device. 2263 */ 2264 if ((vdp->xdf_xdev_nblocks != 0) && 2265 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { 2266 mutex_exit(&vdp->xdf_dev_lk); 2267 return (EINVAL); 2268 } 2269 2270 bzero(&vdp->xdf_pgeom, sizeof (vdp->xdf_pgeom)); 2271 vdp->xdf_pgeom.g_ncyl = geomp->g_ncyl; 2272 vdp->xdf_pgeom.g_acyl = geomp->g_acyl; 2273 vdp->xdf_pgeom.g_nhead = geomp->g_nhead; 2274 vdp->xdf_pgeom.g_nsect = geomp->g_nsect; 2275 vdp->xdf_pgeom.g_secsize = geomp->g_secsize; 2276 vdp->xdf_pgeom.g_capacity = geomp->g_capacity; 2277 vdp->xdf_pgeom.g_intrlv = geomp->g_intrlv; 2278 vdp->xdf_pgeom.g_rpm = geomp->g_rpm; 2279 2280 vdp->xdf_pgeom_fixed = B_TRUE; 2281 mutex_exit(&vdp->xdf_dev_lk); 2282 2283 /* force a re-validation */ 2284 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 2285 2286 return (0); 2287 } 2288 2289 boolean_t 2290 xdf_is_cd(dev_info_t *dip) 2291 { 2292 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2293 boolean_t rv; 2294 2295 mutex_enter(&vdp->xdf_cb_lk); 2296 rv = XD_IS_CD(vdp); 2297 mutex_exit(&vdp->xdf_cb_lk); 2298 return (rv); 2299 } 2300 2301 boolean_t 2302 xdf_is_rm(dev_info_t *dip) 2303 { 2304 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2305 boolean_t rv; 2306 2307 mutex_enter(&vdp->xdf_cb_lk); 2308 rv = XD_IS_RM(vdp); 2309 mutex_exit(&vdp->xdf_cb_lk); 2310 return (rv); 2311 } 2312 2313 boolean_t 2314 xdf_media_req_supported(dev_info_t *dip) 2315 { 2316 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2317 boolean_t rv; 2318 2319 mutex_enter(&vdp->xdf_cb_lk); 2320 rv = vdp->xdf_media_req_supported; 2321 mutex_exit(&vdp->xdf_cb_lk); 2322 return (rv); 2323 } 2324 2325 #endif /* XPV_HVM_DRIVER */ 2326 2327 static int 2328 xdf_lb_getcap(dev_info_t *dip, diskaddr_t *capp) 2329 { 2330 xdf_t *vdp; 2331 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2332 2333 if (vdp == NULL) 2334 return (ENXIO); 2335 2336 mutex_enter(&vdp->xdf_dev_lk); 2337 *capp = vdp->xdf_pgeom.g_capacity; 2338 DPRINTF(LBL_DBG, ("xdf@%s:capacity %llu\n", vdp->xdf_addr, *capp)); 2339 mutex_exit(&vdp->xdf_dev_lk); 2340 return (0); 2341 } 2342 2343 static int 2344 xdf_lb_getpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2345 { 2346 xdf_t *vdp; 2347 2348 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 2349 return (ENXIO); 2350 *geomp = vdp->xdf_pgeom; 2351 return (0); 2352 } 2353 2354 /* 2355 * No real HBA, no geometry available from it 2356 */ 2357 /*ARGSUSED*/ 2358 static int 2359 xdf_lb_getvgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2360 { 2361 return (EINVAL); 2362 } 2363 2364 static int 2365 xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep) 2366 { 2367 xdf_t *vdp; 2368 2369 if (!(vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)))) 2370 return (ENXIO); 2371 2372 if (XD_IS_RO(vdp)) 2373 tgattributep->media_is_writable = 0; 2374 else 2375 tgattributep->media_is_writable = 1; 2376 return (0); 2377 } 2378 2379 /* ARGSUSED3 */ 2380 int 2381 xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 2382 { 2383 int instance; 2384 xdf_t *vdp; 2385 2386 instance = ddi_get_instance(dip); 2387 2388 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) 2389 return (ENXIO); 2390 2391 switch (cmd) { 2392 case TG_GETPHYGEOM: 2393 return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg)); 2394 case TG_GETVIRTGEOM: 2395 return (xdf_lb_getvgeom(dip, (cmlb_geom_t *)arg)); 2396 case TG_GETCAPACITY: 2397 return (xdf_lb_getcap(dip, (diskaddr_t *)arg)); 2398 case TG_GETBLOCKSIZE: 2399 mutex_enter(&vdp->xdf_cb_lk); 2400 *(uint32_t *)arg = vdp->xdf_xdev_secsize; 2401 mutex_exit(&vdp->xdf_cb_lk); 2402 return (0); 2403 case TG_GETATTR: 2404 return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg)); 2405 default: 2406 return (ENOTTY); 2407 } 2408 } 2409 2410 /* ARGSUSED5 */ 2411 int 2412 xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp, 2413 diskaddr_t start, size_t reqlen, void *tg_cookie) 2414 { 2415 xdf_t *vdp; 2416 struct buf *bp; 2417 int err = 0; 2418 2419 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2420 2421 /* We don't allow IO from the oe_change callback thread */ 2422 ASSERT(curthread != vdp->xdf_oe_change_thread); 2423 2424 if ((start + ((reqlen / (vdp->xdf_xdev_secsize / DEV_BSIZE)) 2425 >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) 2426 return (EINVAL); 2427 2428 bp = getrbuf(KM_SLEEP); 2429 if (cmd == TG_READ) 2430 bp->b_flags = B_BUSY | B_READ; 2431 else 2432 bp->b_flags = B_BUSY | B_WRITE; 2433 2434 bp->b_un.b_addr = bufp; 2435 bp->b_bcount = reqlen; 2436 bp->b_blkno = start * (vdp->xdf_xdev_secsize / DEV_BSIZE); 2437 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 2438 2439 mutex_enter(&vdp->xdf_dev_lk); 2440 xdf_bp_push(vdp, bp); 2441 mutex_exit(&vdp->xdf_dev_lk); 2442 xdf_io_start(vdp); 2443 if (curthread == vdp->xdf_ready_tq_thread) 2444 (void) xdf_ring_drain(vdp); 2445 err = biowait(bp); 2446 ASSERT(bp->b_flags & B_DONE); 2447 freerbuf(bp); 2448 return (err); 2449 } 2450 2451 /* 2452 * Lock the current media. Set the media state to "lock". 2453 * (Media locks are only respected by the backend driver.) 2454 */ 2455 static int 2456 xdf_ioctl_mlock(xdf_t *vdp) 2457 { 2458 int rv; 2459 mutex_enter(&vdp->xdf_cb_lk); 2460 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2461 mutex_exit(&vdp->xdf_cb_lk); 2462 return (rv); 2463 } 2464 2465 /* 2466 * Release a media lock. Set the media state to "none". 2467 */ 2468 static int 2469 xdf_ioctl_munlock(xdf_t *vdp) 2470 { 2471 int rv; 2472 mutex_enter(&vdp->xdf_cb_lk); 2473 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_NONE, B_TRUE); 2474 mutex_exit(&vdp->xdf_cb_lk); 2475 return (rv); 2476 } 2477 2478 /* 2479 * Eject the current media. Ignores any media locks. (Media locks 2480 * are only for benifit of the the backend.) 2481 */ 2482 static int 2483 xdf_ioctl_eject(xdf_t *vdp) 2484 { 2485 int rv; 2486 2487 mutex_enter(&vdp->xdf_cb_lk); 2488 if ((rv = xdf_media_req(vdp, XBV_MEDIA_REQ_EJECT, B_FALSE)) != 0) { 2489 mutex_exit(&vdp->xdf_cb_lk); 2490 return (rv); 2491 } 2492 2493 /* 2494 * We've set the media requests xenbus parameter to eject, so now 2495 * disconnect from the backend, wait for the backend to clear 2496 * the media requets xenbus paramter, and then we can reconnect 2497 * to the backend. 2498 */ 2499 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 2500 mutex_enter(&vdp->xdf_dev_lk); 2501 if (xdf_connect_locked(vdp, B_TRUE) != XD_READY) { 2502 mutex_exit(&vdp->xdf_dev_lk); 2503 mutex_exit(&vdp->xdf_cb_lk); 2504 return (EIO); 2505 } 2506 mutex_exit(&vdp->xdf_dev_lk); 2507 mutex_exit(&vdp->xdf_cb_lk); 2508 return (0); 2509 } 2510 2511 /* 2512 * Watch for media state changes. This can be an insertion of a device 2513 * (triggered by a 'xm block-configure' request in another domain) or 2514 * the ejection of a device (triggered by a local "eject" operation). 2515 * For a full description of the DKIOCSTATE ioctl behavior see dkio(7I). 2516 */ 2517 static int 2518 xdf_dkstate(xdf_t *vdp, enum dkio_state mstate) 2519 { 2520 enum dkio_state prev_state; 2521 2522 mutex_enter(&vdp->xdf_cb_lk); 2523 prev_state = vdp->xdf_mstate; 2524 2525 if (vdp->xdf_mstate == mstate) { 2526 while (vdp->xdf_mstate == prev_state) { 2527 if (cv_wait_sig(&vdp->xdf_mstate_cv, 2528 &vdp->xdf_cb_lk) == 0) { 2529 mutex_exit(&vdp->xdf_cb_lk); 2530 return (EINTR); 2531 } 2532 } 2533 } 2534 2535 if ((prev_state != DKIO_INSERTED) && 2536 (vdp->xdf_mstate == DKIO_INSERTED)) { 2537 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2538 mutex_exit(&vdp->xdf_cb_lk); 2539 return (0); 2540 } 2541 2542 mutex_exit(&vdp->xdf_cb_lk); 2543 return (0); 2544 } 2545 2546 /*ARGSUSED*/ 2547 static int 2548 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 2549 int *rvalp) 2550 { 2551 minor_t minor = getminor(dev); 2552 int part = XDF_PART(minor); 2553 xdf_t *vdp; 2554 int rv; 2555 2556 if (((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) || 2557 (!xdf_isopen(vdp, part))) 2558 return (ENXIO); 2559 2560 DPRINTF(IOCTL_DBG, ("xdf@%s:ioctl: cmd %d (0x%x)\n", 2561 vdp->xdf_addr, cmd, cmd)); 2562 2563 switch (cmd) { 2564 default: 2565 return (ENOTTY); 2566 case DKIOCG_PHYGEOM: 2567 case DKIOCG_VIRTGEOM: 2568 case DKIOCGGEOM: 2569 case DKIOCSGEOM: 2570 case DKIOCGAPART: 2571 case DKIOCSAPART: 2572 case DKIOCGVTOC: 2573 case DKIOCSVTOC: 2574 case DKIOCPARTINFO: 2575 case DKIOCGEXTVTOC: 2576 case DKIOCSEXTVTOC: 2577 case DKIOCEXTPARTINFO: 2578 case DKIOCGMBOOT: 2579 case DKIOCSMBOOT: 2580 case DKIOCGETEFI: 2581 case DKIOCSETEFI: 2582 case DKIOCSETEXTPART: 2583 case DKIOCPARTITION: 2584 return (cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 2585 rvalp, NULL)); 2586 case FDEJECT: 2587 case DKIOCEJECT: 2588 case CDROMEJECT: 2589 return (xdf_ioctl_eject(vdp)); 2590 case DKIOCLOCK: 2591 return (xdf_ioctl_mlock(vdp)); 2592 case DKIOCUNLOCK: 2593 return (xdf_ioctl_munlock(vdp)); 2594 case CDROMREADOFFSET: { 2595 int offset = 0; 2596 if (!XD_IS_CD(vdp)) 2597 return (ENOTTY); 2598 if (ddi_copyout(&offset, (void *)arg, sizeof (int), mode)) 2599 return (EFAULT); 2600 return (0); 2601 } 2602 case DKIOCGMEDIAINFO: { 2603 struct dk_minfo media_info; 2604 2605 media_info.dki_lbsize = vdp->xdf_xdev_secsize; 2606 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; 2607 if (XD_IS_CD(vdp)) 2608 media_info.dki_media_type = DK_CDROM; 2609 else 2610 media_info.dki_media_type = DK_FIXED_DISK; 2611 2612 if (ddi_copyout(&media_info, (void *)arg, 2613 sizeof (struct dk_minfo), mode)) 2614 return (EFAULT); 2615 return (0); 2616 } 2617 case DKIOCINFO: { 2618 struct dk_cinfo info; 2619 2620 /* controller information */ 2621 if (XD_IS_CD(vdp)) 2622 info.dki_ctype = DKC_CDROM; 2623 else 2624 info.dki_ctype = DKC_VBD; 2625 2626 info.dki_cnum = 0; 2627 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 2628 2629 /* unit information */ 2630 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 2631 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 2632 info.dki_flags = DKI_FMTVOL; 2633 info.dki_partition = part; 2634 info.dki_maxtransfer = maxphys / DEV_BSIZE; 2635 info.dki_addr = 0; 2636 info.dki_space = 0; 2637 info.dki_prio = 0; 2638 info.dki_vec = 0; 2639 2640 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 2641 return (EFAULT); 2642 return (0); 2643 } 2644 case DKIOCSTATE: { 2645 enum dkio_state mstate; 2646 2647 if (ddi_copyin((void *)arg, &mstate, 2648 sizeof (mstate), mode) != 0) 2649 return (EFAULT); 2650 if ((rv = xdf_dkstate(vdp, mstate)) != 0) 2651 return (rv); 2652 mstate = vdp->xdf_mstate; 2653 if (ddi_copyout(&mstate, (void *)arg, 2654 sizeof (mstate), mode) != 0) 2655 return (EFAULT); 2656 return (0); 2657 } 2658 case DKIOCREMOVABLE: { 2659 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2660 if (ddi_copyout(&i, (caddr_t)arg, sizeof (i), mode)) 2661 return (EFAULT); 2662 return (0); 2663 } 2664 case DKIOCGETWCE: { 2665 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2666 if (ddi_copyout(&i, (void *)arg, sizeof (i), mode)) 2667 return (EFAULT); 2668 return (0); 2669 } 2670 case DKIOCSETWCE: { 2671 int i; 2672 if (ddi_copyin((void *)arg, &i, sizeof (i), mode)) 2673 return (EFAULT); 2674 vdp->xdf_wce = VOID2BOOLEAN(i); 2675 return (0); 2676 } 2677 case DKIOCFLUSHWRITECACHE: { 2678 struct dk_callback *dkc = (struct dk_callback *)arg; 2679 2680 if (vdp->xdf_flush_supported) { 2681 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2682 NULL, 0, 0, (void *)dev); 2683 } else if (vdp->xdf_feature_barrier && 2684 !xdf_barrier_flush_disable) { 2685 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2686 vdp->xdf_cache_flush_block, xdf_flush_block, 2687 vdp->xdf_xdev_secsize, (void *)dev); 2688 } else { 2689 return (ENOTTY); 2690 } 2691 if ((mode & FKIOCTL) && (dkc != NULL) && 2692 (dkc->dkc_callback != NULL)) { 2693 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 2694 /* need to return 0 after calling callback */ 2695 rv = 0; 2696 } 2697 return (rv); 2698 } 2699 } 2700 /*NOTREACHED*/ 2701 } 2702 2703 static int 2704 xdf_strategy(struct buf *bp) 2705 { 2706 xdf_t *vdp; 2707 minor_t minor; 2708 diskaddr_t p_blkct, p_blkst; 2709 daddr_t blkno; 2710 ulong_t nblks; 2711 int part; 2712 2713 minor = getminor(bp->b_edev); 2714 part = XDF_PART(minor); 2715 vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor)); 2716 2717 mutex_enter(&vdp->xdf_dev_lk); 2718 if (!xdf_isopen(vdp, part)) { 2719 mutex_exit(&vdp->xdf_dev_lk); 2720 xdf_io_err(bp, ENXIO, 0); 2721 return (0); 2722 } 2723 2724 /* We don't allow IO from the oe_change callback thread */ 2725 ASSERT(curthread != vdp->xdf_oe_change_thread); 2726 2727 /* Check for writes to a read only device */ 2728 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 2729 mutex_exit(&vdp->xdf_dev_lk); 2730 xdf_io_err(bp, EROFS, 0); 2731 return (0); 2732 } 2733 2734 /* Check if this I/O is accessing a partition or the entire disk */ 2735 if ((long)bp->b_private == XB_SLICE_NONE) { 2736 /* This I/O is using an absolute offset */ 2737 p_blkct = vdp->xdf_xdev_nblocks; 2738 p_blkst = 0; 2739 } else { 2740 /* This I/O is using a partition relative offset */ 2741 mutex_exit(&vdp->xdf_dev_lk); 2742 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 2743 &p_blkst, NULL, NULL, NULL)) { 2744 xdf_io_err(bp, ENXIO, 0); 2745 return (0); 2746 } 2747 mutex_enter(&vdp->xdf_dev_lk); 2748 } 2749 2750 /* 2751 * Adjust the real blkno and bcount according to the underline 2752 * physical sector size. 2753 */ 2754 blkno = bp->b_blkno / (vdp->xdf_xdev_secsize / XB_BSIZE); 2755 2756 /* check for a starting block beyond the disk or partition limit */ 2757 if (blkno > p_blkct) { 2758 DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64, 2759 vdp->xdf_addr, (longlong_t)blkno, (uint64_t)p_blkct)); 2760 mutex_exit(&vdp->xdf_dev_lk); 2761 xdf_io_err(bp, EINVAL, 0); 2762 return (0); 2763 } 2764 2765 /* Legacy: don't set error flag at this case */ 2766 if (blkno == p_blkct) { 2767 mutex_exit(&vdp->xdf_dev_lk); 2768 bp->b_resid = bp->b_bcount; 2769 biodone(bp); 2770 return (0); 2771 } 2772 2773 /* sanitize the input buf */ 2774 bioerror(bp, 0); 2775 bp->b_resid = 0; 2776 bp->av_back = bp->av_forw = NULL; 2777 2778 /* Adjust for partial transfer, this will result in an error later */ 2779 if (vdp->xdf_xdev_secsize != 0 && 2780 vdp->xdf_xdev_secsize != XB_BSIZE) { 2781 nblks = bp->b_bcount / vdp->xdf_xdev_secsize; 2782 } else { 2783 nblks = bp->b_bcount >> XB_BSHIFT; 2784 } 2785 2786 if ((blkno + nblks) > p_blkct) { 2787 if (vdp->xdf_xdev_secsize != 0 && 2788 vdp->xdf_xdev_secsize != XB_BSIZE) { 2789 bp->b_resid = 2790 ((blkno + nblks) - p_blkct) * 2791 vdp->xdf_xdev_secsize; 2792 } else { 2793 bp->b_resid = 2794 ((blkno + nblks) - p_blkct) << 2795 XB_BSHIFT; 2796 } 2797 bp->b_bcount -= bp->b_resid; 2798 } 2799 2800 DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n", 2801 vdp->xdf_addr, (longlong_t)blkno, (ulong_t)bp->b_bcount)); 2802 2803 /* Fix up the buf struct */ 2804 bp->b_flags |= B_BUSY; 2805 bp->b_private = (void *)(uintptr_t)p_blkst; 2806 2807 xdf_bp_push(vdp, bp); 2808 mutex_exit(&vdp->xdf_dev_lk); 2809 xdf_io_start(vdp); 2810 if (do_polled_io) 2811 (void) xdf_ring_drain(vdp); 2812 return (0); 2813 } 2814 2815 /*ARGSUSED*/ 2816 static int 2817 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 2818 { 2819 xdf_t *vdp; 2820 minor_t minor; 2821 diskaddr_t p_blkcnt; 2822 int part; 2823 2824 minor = getminor(dev); 2825 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2826 return (ENXIO); 2827 2828 DPRINTF(IO_DBG, ("xdf@%s: read offset 0x%"PRIx64"\n", 2829 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2830 2831 part = XDF_PART(minor); 2832 if (!xdf_isopen(vdp, part)) 2833 return (ENXIO); 2834 2835 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2836 NULL, NULL, NULL, NULL)) 2837 return (ENXIO); 2838 2839 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2840 return (ENOSPC); 2841 2842 if (U_INVAL(uiop)) 2843 return (EINVAL); 2844 2845 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 2846 } 2847 2848 /*ARGSUSED*/ 2849 static int 2850 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 2851 { 2852 xdf_t *vdp; 2853 minor_t minor; 2854 diskaddr_t p_blkcnt; 2855 int part; 2856 2857 minor = getminor(dev); 2858 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2859 return (ENXIO); 2860 2861 DPRINTF(IO_DBG, ("xdf@%s: write offset 0x%"PRIx64"\n", 2862 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2863 2864 part = XDF_PART(minor); 2865 if (!xdf_isopen(vdp, part)) 2866 return (ENXIO); 2867 2868 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2869 NULL, NULL, NULL, NULL)) 2870 return (ENXIO); 2871 2872 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2873 return (ENOSPC); 2874 2875 if (U_INVAL(uiop)) 2876 return (EINVAL); 2877 2878 return (physio(xdf_strategy, NULL, dev, B_WRITE, xdfmin, uiop)); 2879 } 2880 2881 /*ARGSUSED*/ 2882 static int 2883 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 2884 { 2885 xdf_t *vdp; 2886 minor_t minor; 2887 struct uio *uiop = aiop->aio_uio; 2888 diskaddr_t p_blkcnt; 2889 int part; 2890 2891 minor = getminor(dev); 2892 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2893 return (ENXIO); 2894 2895 part = XDF_PART(minor); 2896 if (!xdf_isopen(vdp, part)) 2897 return (ENXIO); 2898 2899 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2900 NULL, NULL, NULL, NULL)) 2901 return (ENXIO); 2902 2903 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2904 return (ENOSPC); 2905 2906 if (U_INVAL(uiop)) 2907 return (EINVAL); 2908 2909 return (aphysio(xdf_strategy, anocancel, dev, B_READ, xdfmin, aiop)); 2910 } 2911 2912 /*ARGSUSED*/ 2913 static int 2914 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 2915 { 2916 xdf_t *vdp; 2917 minor_t minor; 2918 struct uio *uiop = aiop->aio_uio; 2919 diskaddr_t p_blkcnt; 2920 int part; 2921 2922 minor = getminor(dev); 2923 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2924 return (ENXIO); 2925 2926 part = XDF_PART(minor); 2927 if (!xdf_isopen(vdp, part)) 2928 return (ENXIO); 2929 2930 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2931 NULL, NULL, NULL, NULL)) 2932 return (ENXIO); 2933 2934 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2935 return (ENOSPC); 2936 2937 if (U_INVAL(uiop)) 2938 return (EINVAL); 2939 2940 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, xdfmin, aiop)); 2941 } 2942 2943 static int 2944 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 2945 { 2946 struct buf dumpbuf, *dbp = &dumpbuf; 2947 xdf_t *vdp; 2948 minor_t minor; 2949 int err = 0; 2950 int part; 2951 diskaddr_t p_blkcnt, p_blkst; 2952 2953 minor = getminor(dev); 2954 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2955 return (ENXIO); 2956 2957 DPRINTF(IO_DBG, ("xdf@%s: dump addr (0x%p) blk (%ld) nblks (%d)\n", 2958 vdp->xdf_addr, (void *)addr, blkno, nblk)); 2959 2960 /* We don't allow IO from the oe_change callback thread */ 2961 ASSERT(curthread != vdp->xdf_oe_change_thread); 2962 2963 part = XDF_PART(minor); 2964 if (!xdf_isopen(vdp, part)) 2965 return (ENXIO); 2966 2967 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 2968 NULL, NULL, NULL)) 2969 return (ENXIO); 2970 2971 if ((blkno + nblk) > 2972 (p_blkcnt * (vdp->xdf_xdev_secsize / XB_BSIZE))) { 2973 cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64, 2974 vdp->xdf_addr, (daddr_t)((blkno + nblk) / 2975 (vdp->xdf_xdev_secsize / XB_BSIZE)), (uint64_t)p_blkcnt); 2976 return (EINVAL); 2977 } 2978 2979 bioinit(dbp); 2980 dbp->b_flags = B_BUSY; 2981 dbp->b_un.b_addr = addr; 2982 dbp->b_bcount = nblk << DEV_BSHIFT; 2983 dbp->b_blkno = blkno; 2984 dbp->b_edev = dev; 2985 dbp->b_private = (void *)(uintptr_t)p_blkst; 2986 2987 mutex_enter(&vdp->xdf_dev_lk); 2988 xdf_bp_push(vdp, dbp); 2989 mutex_exit(&vdp->xdf_dev_lk); 2990 xdf_io_start(vdp); 2991 err = xdf_ring_drain(vdp); 2992 biofini(dbp); 2993 return (err); 2994 } 2995 2996 /*ARGSUSED*/ 2997 static int 2998 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 2999 { 3000 minor_t minor; 3001 xdf_t *vdp; 3002 int part; 3003 ulong_t parbit; 3004 3005 minor = getminor(dev); 3006 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 3007 return (ENXIO); 3008 3009 mutex_enter(&vdp->xdf_dev_lk); 3010 part = XDF_PART(minor); 3011 if (!xdf_isopen(vdp, part)) { 3012 mutex_exit(&vdp->xdf_dev_lk); 3013 return (ENXIO); 3014 } 3015 parbit = 1 << part; 3016 3017 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); 3018 if (otyp == OTYP_LYR) { 3019 ASSERT(vdp->xdf_vd_lyropen[part] > 0); 3020 if (--vdp->xdf_vd_lyropen[part] == 0) 3021 vdp->xdf_vd_open[otyp] &= ~parbit; 3022 } else { 3023 vdp->xdf_vd_open[otyp] &= ~parbit; 3024 } 3025 vdp->xdf_vd_exclopen &= ~parbit; 3026 3027 mutex_exit(&vdp->xdf_dev_lk); 3028 return (0); 3029 } 3030 3031 static int 3032 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 3033 { 3034 minor_t minor; 3035 xdf_t *vdp; 3036 int part; 3037 ulong_t parbit; 3038 diskaddr_t p_blkct = 0; 3039 boolean_t firstopen; 3040 boolean_t nodelay; 3041 3042 minor = getminor(*devp); 3043 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 3044 return (ENXIO); 3045 3046 nodelay = (flag & (FNDELAY | FNONBLOCK)); 3047 3048 DPRINTF(DDI_DBG, ("xdf@%s: opening\n", vdp->xdf_addr)); 3049 3050 /* do cv_wait until connected or failed */ 3051 mutex_enter(&vdp->xdf_cb_lk); 3052 mutex_enter(&vdp->xdf_dev_lk); 3053 if (!nodelay && (xdf_connect_locked(vdp, B_TRUE) != XD_READY)) { 3054 mutex_exit(&vdp->xdf_dev_lk); 3055 mutex_exit(&vdp->xdf_cb_lk); 3056 return (ENXIO); 3057 } 3058 mutex_exit(&vdp->xdf_cb_lk); 3059 3060 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 3061 mutex_exit(&vdp->xdf_dev_lk); 3062 return (EROFS); 3063 } 3064 3065 part = XDF_PART(minor); 3066 parbit = 1 << part; 3067 if ((vdp->xdf_vd_exclopen & parbit) || 3068 ((flag & FEXCL) && xdf_isopen(vdp, part))) { 3069 mutex_exit(&vdp->xdf_dev_lk); 3070 return (EBUSY); 3071 } 3072 3073 /* are we the first one to open this node? */ 3074 firstopen = !xdf_isopen(vdp, -1); 3075 3076 if (otyp == OTYP_LYR) 3077 vdp->xdf_vd_lyropen[part]++; 3078 3079 vdp->xdf_vd_open[otyp] |= parbit; 3080 3081 if (flag & FEXCL) 3082 vdp->xdf_vd_exclopen |= parbit; 3083 3084 mutex_exit(&vdp->xdf_dev_lk); 3085 3086 /* force a re-validation */ 3087 if (firstopen) 3088 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 3089 3090 /* If this is a non-blocking open then we're done */ 3091 if (nodelay) 3092 return (0); 3093 3094 /* 3095 * This is a blocking open, so we require: 3096 * - that the disk have a valid label on it 3097 * - that the size of the partition that we're opening is non-zero 3098 */ 3099 if ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 3100 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0)) { 3101 (void) xdf_close(*devp, flag, otyp, credp); 3102 return (ENXIO); 3103 } 3104 3105 return (0); 3106 } 3107 3108 /*ARGSUSED*/ 3109 static void 3110 xdf_watch_hp_status_cb(dev_info_t *dip, const char *path, void *arg) 3111 { 3112 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 3113 cv_broadcast(&vdp->xdf_hp_status_cv); 3114 } 3115 3116 static int 3117 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 3118 char *name, caddr_t valuep, int *lengthp) 3119 { 3120 xdf_t *vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 3121 3122 /* 3123 * Sanity check that if a dev_t or dip were specified that they 3124 * correspond to this device driver. On debug kernels we'll 3125 * panic and on non-debug kernels we'll return failure. 3126 */ 3127 ASSERT(ddi_driver_major(dip) == xdf_major); 3128 ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdf_major)); 3129 if ((ddi_driver_major(dip) != xdf_major) || 3130 ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdf_major))) 3131 return (DDI_PROP_NOT_FOUND); 3132 3133 if (vdp == NULL) 3134 return (ddi_prop_op(dev, dip, prop_op, flags, 3135 name, valuep, lengthp)); 3136 3137 return (cmlb_prop_op(vdp->xdf_vd_lbl, 3138 dev, dip, prop_op, flags, name, valuep, lengthp, 3139 XDF_PART(getminor(dev)), NULL)); 3140 } 3141 3142 /*ARGSUSED*/ 3143 static int 3144 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 3145 { 3146 int instance = XDF_INST(getminor((dev_t)arg)); 3147 xdf_t *vbdp; 3148 3149 switch (cmd) { 3150 case DDI_INFO_DEVT2DEVINFO: 3151 if ((vbdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) { 3152 *rp = NULL; 3153 return (DDI_FAILURE); 3154 } 3155 *rp = vbdp->xdf_dip; 3156 return (DDI_SUCCESS); 3157 3158 case DDI_INFO_DEVT2INSTANCE: 3159 *rp = (void *)(uintptr_t)instance; 3160 return (DDI_SUCCESS); 3161 3162 default: 3163 return (DDI_FAILURE); 3164 } 3165 } 3166 3167 /*ARGSUSED*/ 3168 static int 3169 xdf_resume(dev_info_t *dip) 3170 { 3171 xdf_t *vdp; 3172 char *oename; 3173 3174 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 3175 goto err; 3176 3177 if (xdf_debug & SUSRES_DBG) 3178 xen_printf("xdf@%s: xdf_resume\n", vdp->xdf_addr); 3179 3180 mutex_enter(&vdp->xdf_cb_lk); 3181 3182 if (xvdi_resume(dip) != DDI_SUCCESS) { 3183 mutex_exit(&vdp->xdf_cb_lk); 3184 goto err; 3185 } 3186 3187 if (((oename = xvdi_get_oename(dip)) == NULL) || 3188 (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3189 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)) { 3190 mutex_exit(&vdp->xdf_cb_lk); 3191 goto err; 3192 } 3193 3194 mutex_enter(&vdp->xdf_dev_lk); 3195 ASSERT(vdp->xdf_state != XD_READY); 3196 xdf_set_state(vdp, XD_UNKNOWN); 3197 mutex_exit(&vdp->xdf_dev_lk); 3198 3199 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3200 mutex_exit(&vdp->xdf_cb_lk); 3201 goto err; 3202 } 3203 3204 mutex_exit(&vdp->xdf_cb_lk); 3205 3206 if (xdf_debug & SUSRES_DBG) 3207 xen_printf("xdf@%s: xdf_resume: done\n", vdp->xdf_addr); 3208 return (DDI_SUCCESS); 3209 err: 3210 if (xdf_debug & SUSRES_DBG) 3211 xen_printf("xdf@%s: xdf_resume: fail\n", vdp->xdf_addr); 3212 return (DDI_FAILURE); 3213 } 3214 3215 static int 3216 xdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3217 { 3218 int n, instance = ddi_get_instance(dip); 3219 ddi_iblock_cookie_t ibc, softibc; 3220 boolean_t dev_iscd = B_FALSE; 3221 xdf_t *vdp; 3222 char *oename, *xsname, *str; 3223 3224 if ((n = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_NOTPROM, 3225 "xdf_debug", 0)) != 0) 3226 xdf_debug = n; 3227 3228 switch (cmd) { 3229 case DDI_RESUME: 3230 return (xdf_resume(dip)); 3231 case DDI_ATTACH: 3232 break; 3233 default: 3234 return (DDI_FAILURE); 3235 } 3236 /* DDI_ATTACH */ 3237 3238 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 3239 ((oename = xvdi_get_oename(dip)) == NULL)) 3240 return (DDI_FAILURE); 3241 3242 /* 3243 * Disable auto-detach. This is necessary so that we don't get 3244 * detached while we're disconnected from the back end. 3245 */ 3246 if ((ddi_prop_update_int(DDI_DEV_T_NONE, dip, 3247 DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS)) 3248 return (DDI_FAILURE); 3249 3250 /* driver handles kernel-issued IOCTLs */ 3251 if (ddi_prop_create(DDI_DEV_T_NONE, dip, 3252 DDI_PROP_CANSLEEP, DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) 3253 return (DDI_FAILURE); 3254 3255 if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS) 3256 return (DDI_FAILURE); 3257 3258 if (ddi_get_soft_iblock_cookie(dip, 3259 DDI_SOFTINT_LOW, &softibc) != DDI_SUCCESS) 3260 return (DDI_FAILURE); 3261 3262 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 3263 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 3264 ddi_get_name_addr(dip)); 3265 return (DDI_FAILURE); 3266 } 3267 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 3268 dev_iscd = B_TRUE; 3269 strfree(str); 3270 3271 if (ddi_soft_state_zalloc(xdf_ssp, instance) != DDI_SUCCESS) 3272 return (DDI_FAILURE); 3273 3274 DPRINTF(DDI_DBG, ("xdf@%s: attaching\n", ddi_get_name_addr(dip))); 3275 vdp = ddi_get_soft_state(xdf_ssp, instance); 3276 ddi_set_driver_private(dip, vdp); 3277 vdp->xdf_dip = dip; 3278 vdp->xdf_addr = ddi_get_name_addr(dip); 3279 vdp->xdf_suspending = B_FALSE; 3280 vdp->xdf_media_req_supported = B_FALSE; 3281 vdp->xdf_peer = INVALID_DOMID; 3282 vdp->xdf_evtchn = INVALID_EVTCHN; 3283 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 3284 offsetof(v_req_t, v_link)); 3285 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 3286 cv_init(&vdp->xdf_hp_status_cv, NULL, CV_DEFAULT, NULL); 3287 cv_init(&vdp->xdf_mstate_cv, NULL, CV_DEFAULT, NULL); 3288 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3289 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3290 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3291 vdp->xdf_cmbl_reattach = B_TRUE; 3292 if (dev_iscd) { 3293 vdp->xdf_dinfo |= VDISK_CDROM; 3294 vdp->xdf_mstate = DKIO_EJECTED; 3295 } else { 3296 vdp->xdf_mstate = DKIO_NONE; 3297 } 3298 3299 if ((vdp->xdf_ready_tq = ddi_taskq_create(dip, "xdf_ready_tq", 3300 1, TASKQ_DEFAULTPRI, 0)) == NULL) 3301 goto errout0; 3302 3303 if (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3304 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS) 3305 goto errout0; 3306 3307 if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 3308 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 3309 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 3310 ddi_get_name_addr(dip)); 3311 goto errout0; 3312 } 3313 3314 /* 3315 * Initialize the physical geometry stucture. Note that currently 3316 * we don't know the size of the backend device so the number 3317 * of blocks on the device will be initialized to zero. Once 3318 * we connect to the backend device we'll update the physical 3319 * geometry to reflect the real size of the device. 3320 */ 3321 xdf_synthetic_pgeom(dip, &vdp->xdf_pgeom); 3322 vdp->xdf_pgeom_fixed = B_FALSE; 3323 3324 /* 3325 * create default device minor nodes: non-removable disk 3326 * we will adjust minor nodes after we are connected w/ backend 3327 */ 3328 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 3329 if (xdf_cmlb_attach(vdp) != 0) { 3330 cmn_err(CE_WARN, 3331 "xdf@%s: attach failed, cmlb attach failed", 3332 ddi_get_name_addr(dip)); 3333 goto errout0; 3334 } 3335 3336 /* 3337 * We ship with cache-enabled disks 3338 */ 3339 vdp->xdf_wce = B_TRUE; 3340 3341 mutex_enter(&vdp->xdf_cb_lk); 3342 /* Watch backend XenbusState change */ 3343 if (xvdi_add_event_handler(dip, 3344 XS_OE_STATE, xdf_oe_change, NULL) != DDI_SUCCESS) { 3345 mutex_exit(&vdp->xdf_cb_lk); 3346 goto errout0; 3347 } 3348 3349 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3350 cmn_err(CE_WARN, "xdf@%s: start connection failed", 3351 ddi_get_name_addr(dip)); 3352 mutex_exit(&vdp->xdf_cb_lk); 3353 goto errout1; 3354 } 3355 mutex_exit(&vdp->xdf_cb_lk); 3356 3357 #if defined(XPV_HVM_DRIVER) 3358 3359 xdf_hvm_add(dip); 3360 3361 /* Report our version to dom0. */ 3362 if (xenbus_printf(XBT_NULL, "guest/xdf", "version", "%d", 3363 HVMPV_XDF_VERS)) 3364 cmn_err(CE_WARN, "xdf: couldn't write version\n"); 3365 3366 #else /* !XPV_HVM_DRIVER */ 3367 3368 /* create kstat for iostat(1M) */ 3369 if (xdf_kstat_create(dip, "xdf", instance) != 0) { 3370 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 3371 ddi_get_name_addr(dip)); 3372 goto errout1; 3373 } 3374 3375 #endif /* !XPV_HVM_DRIVER */ 3376 3377 ddi_report_dev(dip); 3378 DPRINTF(DDI_DBG, ("xdf@%s: attached\n", vdp->xdf_addr)); 3379 return (DDI_SUCCESS); 3380 3381 errout1: 3382 (void) xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed); 3383 xvdi_remove_event_handler(dip, XS_OE_STATE); 3384 errout0: 3385 if (vdp->xdf_vd_lbl != NULL) { 3386 cmlb_detach(vdp->xdf_vd_lbl, NULL); 3387 cmlb_free_handle(&vdp->xdf_vd_lbl); 3388 vdp->xdf_vd_lbl = NULL; 3389 } 3390 if (vdp->xdf_softintr_id != NULL) 3391 ddi_remove_softintr(vdp->xdf_softintr_id); 3392 xvdi_remove_xb_watch_handlers(dip); 3393 if (vdp->xdf_ready_tq != NULL) 3394 ddi_taskq_destroy(vdp->xdf_ready_tq); 3395 mutex_destroy(&vdp->xdf_cb_lk); 3396 mutex_destroy(&vdp->xdf_dev_lk); 3397 cv_destroy(&vdp->xdf_dev_cv); 3398 cv_destroy(&vdp->xdf_hp_status_cv); 3399 ddi_soft_state_free(xdf_ssp, instance); 3400 ddi_set_driver_private(dip, NULL); 3401 ddi_prop_remove_all(dip); 3402 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(dip)); 3403 return (DDI_FAILURE); 3404 } 3405 3406 static int 3407 xdf_suspend(dev_info_t *dip) 3408 { 3409 int instance = ddi_get_instance(dip); 3410 xdf_t *vdp; 3411 3412 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) 3413 return (DDI_FAILURE); 3414 3415 if (xdf_debug & SUSRES_DBG) 3416 xen_printf("xdf@%s: xdf_suspend\n", vdp->xdf_addr); 3417 3418 xvdi_suspend(dip); 3419 3420 mutex_enter(&vdp->xdf_cb_lk); 3421 mutex_enter(&vdp->xdf_dev_lk); 3422 3423 vdp->xdf_suspending = B_TRUE; 3424 xdf_ring_destroy(vdp); 3425 xdf_set_state(vdp, XD_SUSPEND); 3426 vdp->xdf_suspending = B_FALSE; 3427 3428 mutex_exit(&vdp->xdf_dev_lk); 3429 mutex_exit(&vdp->xdf_cb_lk); 3430 3431 if (xdf_debug & SUSRES_DBG) 3432 xen_printf("xdf@%s: xdf_suspend: done\n", vdp->xdf_addr); 3433 3434 return (DDI_SUCCESS); 3435 } 3436 3437 static int 3438 xdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3439 { 3440 xdf_t *vdp; 3441 int instance; 3442 3443 switch (cmd) { 3444 3445 case DDI_PM_SUSPEND: 3446 break; 3447 3448 case DDI_SUSPEND: 3449 return (xdf_suspend(dip)); 3450 3451 case DDI_DETACH: 3452 break; 3453 3454 default: 3455 return (DDI_FAILURE); 3456 } 3457 3458 instance = ddi_get_instance(dip); 3459 DPRINTF(DDI_DBG, ("xdf@%s: detaching\n", ddi_get_name_addr(dip))); 3460 vdp = ddi_get_soft_state(xdf_ssp, instance); 3461 3462 if (vdp == NULL) 3463 return (DDI_FAILURE); 3464 3465 mutex_enter(&vdp->xdf_cb_lk); 3466 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 3467 if (vdp->xdf_state != XD_CLOSED) { 3468 mutex_exit(&vdp->xdf_cb_lk); 3469 return (DDI_FAILURE); 3470 } 3471 mutex_exit(&vdp->xdf_cb_lk); 3472 3473 ASSERT(!ISDMACBON(vdp)); 3474 3475 #if defined(XPV_HVM_DRIVER) 3476 xdf_hvm_rm(dip); 3477 #endif /* XPV_HVM_DRIVER */ 3478 3479 if (vdp->xdf_timeout_id != 0) 3480 (void) untimeout(vdp->xdf_timeout_id); 3481 3482 xvdi_remove_event_handler(dip, XS_OE_STATE); 3483 ddi_taskq_destroy(vdp->xdf_ready_tq); 3484 3485 cmlb_detach(vdp->xdf_vd_lbl, NULL); 3486 cmlb_free_handle(&vdp->xdf_vd_lbl); 3487 3488 /* we'll support backend running in domU later */ 3489 #ifdef DOMU_BACKEND 3490 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 3491 #endif 3492 3493 list_destroy(&vdp->xdf_vreq_act); 3494 ddi_prop_remove_all(dip); 3495 xdf_kstat_delete(dip); 3496 ddi_remove_softintr(vdp->xdf_softintr_id); 3497 xvdi_remove_xb_watch_handlers(dip); 3498 ddi_set_driver_private(dip, NULL); 3499 cv_destroy(&vdp->xdf_dev_cv); 3500 mutex_destroy(&vdp->xdf_cb_lk); 3501 mutex_destroy(&vdp->xdf_dev_lk); 3502 if (vdp->xdf_cache_flush_block != NULL) 3503 kmem_free(vdp->xdf_flush_mem, 2 * vdp->xdf_xdev_secsize); 3504 ddi_soft_state_free(xdf_ssp, instance); 3505 return (DDI_SUCCESS); 3506 } 3507 3508 /* 3509 * Driver linkage structures. 3510 */ 3511 static struct cb_ops xdf_cbops = { 3512 xdf_open, 3513 xdf_close, 3514 xdf_strategy, 3515 nodev, 3516 xdf_dump, 3517 xdf_read, 3518 xdf_write, 3519 xdf_ioctl, 3520 nodev, 3521 nodev, 3522 nodev, 3523 nochpoll, 3524 xdf_prop_op, 3525 NULL, 3526 D_MP | D_NEW | D_64BIT, 3527 CB_REV, 3528 xdf_aread, 3529 xdf_awrite 3530 }; 3531 3532 struct dev_ops xdf_devops = { 3533 DEVO_REV, /* devo_rev */ 3534 0, /* devo_refcnt */ 3535 xdf_getinfo, /* devo_getinfo */ 3536 nulldev, /* devo_identify */ 3537 nulldev, /* devo_probe */ 3538 xdf_attach, /* devo_attach */ 3539 xdf_detach, /* devo_detach */ 3540 nodev, /* devo_reset */ 3541 &xdf_cbops, /* devo_cb_ops */ 3542 NULL, /* devo_bus_ops */ 3543 NULL, /* devo_power */ 3544 ddi_quiesce_not_supported, /* devo_quiesce */ 3545 }; 3546 3547 /* 3548 * Module linkage structures. 3549 */ 3550 static struct modldrv modldrv = { 3551 &mod_driverops, /* Type of module. This one is a driver */ 3552 "virtual block driver", /* short description */ 3553 &xdf_devops /* driver specific ops */ 3554 }; 3555 3556 static struct modlinkage xdf_modlinkage = { 3557 MODREV_1, (void *)&modldrv, NULL 3558 }; 3559 3560 /* 3561 * standard module entry points 3562 */ 3563 int 3564 _init(void) 3565 { 3566 int rc; 3567 3568 xdf_major = ddi_name_to_major("xdf"); 3569 if (xdf_major == (major_t)-1) 3570 return (EINVAL); 3571 3572 if ((rc = ddi_soft_state_init(&xdf_ssp, sizeof (xdf_t), 0)) != 0) 3573 return (rc); 3574 3575 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 3576 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3577 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 3578 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3579 3580 #if defined(XPV_HVM_DRIVER) 3581 xdf_hvm_init(); 3582 #endif /* XPV_HVM_DRIVER */ 3583 3584 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 3585 #if defined(XPV_HVM_DRIVER) 3586 xdf_hvm_fini(); 3587 #endif /* XPV_HVM_DRIVER */ 3588 kmem_cache_destroy(xdf_vreq_cache); 3589 kmem_cache_destroy(xdf_gs_cache); 3590 ddi_soft_state_fini(&xdf_ssp); 3591 return (rc); 3592 } 3593 3594 return (rc); 3595 } 3596 3597 int 3598 _fini(void) 3599 { 3600 int err; 3601 if ((err = mod_remove(&xdf_modlinkage)) != 0) 3602 return (err); 3603 3604 #if defined(XPV_HVM_DRIVER) 3605 xdf_hvm_fini(); 3606 #endif /* XPV_HVM_DRIVER */ 3607 3608 kmem_cache_destroy(xdf_vreq_cache); 3609 kmem_cache_destroy(xdf_gs_cache); 3610 ddi_soft_state_fini(&xdf_ssp); 3611 3612 return (0); 3613 } 3614 3615 int 3616 _info(struct modinfo *modinfop) 3617 { 3618 return (mod_info(&xdf_modlinkage, modinfop)); 3619 } 3620