1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * xdf.c - Xen Virtual Block Device Driver 29 * TODO: 30 * - support alternate block size (currently only DEV_BSIZE supported) 31 * - revalidate geometry for removable devices 32 * 33 * This driver export solaris disk device nodes, accepts IO requests from 34 * those nodes, and services those requests by talking to a backend device 35 * in another domain. 36 * 37 * Communication with the backend device is done via a ringbuffer (which is 38 * managed via xvdi interfaces) and dma memory (which is managed via ddi 39 * interfaces). 40 * 41 * Communication with the backend device is dependant upon establishing a 42 * connection to the backend device. This connection process involves 43 * reading device configuration information from xenbus and publishing 44 * some frontend runtime configuration parameters via the xenbus (for 45 * consumption by the backend). Once we've published runtime configuration 46 * information via the xenbus, the backend device can enter the connected 47 * state and we'll enter the XD_CONNECTED state. But before we can allow 48 * random IO to begin, we need to do IO to the backend device to determine 49 * the device label and if flush operations are supported. Once this is 50 * done we enter the XD_READY state and can process any IO operations. 51 * 52 * We recieve notifications of xenbus state changes for the backend device 53 * (aka, the "other end") via the xdf_oe_change() callback. This callback 54 * is single threaded, meaning that we can't recieve new notification of 55 * other end state changes while we're processing an outstanding 56 * notification of an other end state change. There for we can't do any 57 * blocking operations from the xdf_oe_change() callback. This is why we 58 * have a seperate taskq (xdf_ready_tq) which exists to do the necessary 59 * IO to get us from the XD_CONNECTED to the XD_READY state. All IO 60 * generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go 61 * throught xdf_lb_rdwr(), which is a synchronous IO interface. IOs 62 * generated by the xdf_ready_tq_thread thread have priority over all 63 * other IO requests. 64 * 65 * We also communicate with the backend device via the xenbus "media-req" 66 * (XBP_MEDIA_REQ) property. For more information on this see the 67 * comments in blkif.h. 68 */ 69 70 #include <io/xdf.h> 71 72 #include <sys/conf.h> 73 #include <sys/dkio.h> 74 #include <sys/promif.h> 75 #include <sys/sysmacros.h> 76 #include <sys/kstat.h> 77 #include <sys/mach_mmu.h> 78 #ifdef XPV_HVM_DRIVER 79 #include <sys/xpv_support.h> 80 #include <sys/sunndi.h> 81 #else /* !XPV_HVM_DRIVER */ 82 #include <sys/evtchn_impl.h> 83 #endif /* !XPV_HVM_DRIVER */ 84 #include <public/io/xenbus.h> 85 #include <xen/sys/xenbus_impl.h> 86 #include <sys/scsi/generic/inquiry.h> 87 #include <xen/io/blkif_impl.h> 88 #include <sys/fdio.h> 89 #include <sys/cdio.h> 90 91 /* 92 * DEBUG_EVAL can be used to include debug only statements without 93 * having to use '#ifdef DEBUG' statements 94 */ 95 #ifdef DEBUG 96 #define DEBUG_EVAL(x) (x) 97 #else /* !DEBUG */ 98 #define DEBUG_EVAL(x) 99 #endif /* !DEBUG */ 100 101 #define XDF_DRAIN_MSEC_DELAY (50*1000) /* 00.05 sec */ 102 #define XDF_DRAIN_RETRY_COUNT 200 /* 10.00 sec */ 103 104 #define INVALID_DOMID ((domid_t)-1) 105 #define FLUSH_DISKCACHE 0x1 106 #define WRITE_BARRIER 0x2 107 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 108 #define USE_WRITE_BARRIER(vdp) \ 109 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 110 #define USE_FLUSH_DISKCACHE(vdp) \ 111 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 112 #define IS_WRITE_BARRIER(vdp, bp) \ 113 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 114 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 115 #define IS_FLUSH_DISKCACHE(bp) \ 116 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 117 118 #define VREQ_DONE(vreq) \ 119 VOID2BOOLEAN(((vreq)->v_status == VREQ_DMAWIN_DONE) && \ 120 (((vreq)->v_flush_diskcache == FLUSH_DISKCACHE) || \ 121 (((vreq)->v_dmaw + 1) == (vreq)->v_ndmaws))) 122 123 #define BP_VREQ(bp) ((v_req_t *)((bp)->av_back)) 124 #define BP_VREQ_SET(bp, vreq) (((bp)->av_back = (buf_t *)(vreq))) 125 126 extern int do_polled_io; 127 128 /* run-time tunables that we don't want the compiler to optimize away */ 129 volatile int xdf_debug = 0; 130 volatile boolean_t xdf_barrier_flush_disable = B_FALSE; 131 132 /* per module globals */ 133 major_t xdf_major; 134 static void *xdf_ssp; 135 static kmem_cache_t *xdf_vreq_cache; 136 static kmem_cache_t *xdf_gs_cache; 137 static int xdf_maxphys = XB_MAXPHYS; 138 static diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 139 static int xdf_fbrewrites; /* flush block re-write count */ 140 141 /* misc public functions (used by xdf_shell.c) */ 142 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); 143 int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 144 145 /* misc private functions */ 146 static void xdf_io_start(xdf_t *); 147 148 /* callbacks from commmon label */ 149 static cmlb_tg_ops_t xdf_lb_ops = { 150 TG_DK_OPS_VERSION_1, 151 xdf_lb_rdwr, 152 xdf_lb_getinfo 153 }; 154 155 /* 156 * I/O buffer DMA attributes 157 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 158 */ 159 static ddi_dma_attr_t xb_dma_attr = { 160 DMA_ATTR_V0, 161 (uint64_t)0, /* lowest address */ 162 (uint64_t)0xffffffffffffffff, /* highest usable address */ 163 (uint64_t)0xffffff, /* DMA counter limit max */ 164 (uint64_t)XB_BSIZE, /* alignment in bytes */ 165 XB_BSIZE - 1, /* bitmap of burst sizes */ 166 XB_BSIZE, /* min transfer */ 167 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 168 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 169 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 170 XB_BSIZE, /* granularity */ 171 0, /* flags (reserved) */ 172 }; 173 174 static ddi_device_acc_attr_t xc_acc_attr = { 175 DDI_DEVICE_ATTR_V0, 176 DDI_NEVERSWAP_ACC, 177 DDI_STRICTORDER_ACC 178 }; 179 180 static void 181 xdf_timeout_handler(void *arg) 182 { 183 xdf_t *vdp = arg; 184 185 mutex_enter(&vdp->xdf_dev_lk); 186 vdp->xdf_timeout_id = 0; 187 mutex_exit(&vdp->xdf_dev_lk); 188 189 /* new timeout thread could be re-scheduled */ 190 xdf_io_start(vdp); 191 } 192 193 /* 194 * callback func when DMA/GTE resources is available 195 * 196 * Note: we only register one callback function to grant table subsystem 197 * since we only have one 'struct gnttab_free_callback' in xdf_t. 198 */ 199 static int 200 xdf_dmacallback(caddr_t arg) 201 { 202 xdf_t *vdp = (xdf_t *)arg; 203 ASSERT(vdp != NULL); 204 205 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 206 vdp->xdf_addr)); 207 208 ddi_trigger_softintr(vdp->xdf_softintr_id); 209 return (DDI_DMA_CALLBACK_DONE); 210 } 211 212 static ge_slot_t * 213 gs_get(xdf_t *vdp, int isread) 214 { 215 grant_ref_t gh; 216 ge_slot_t *gs; 217 218 /* try to alloc GTEs needed in this slot, first */ 219 if (gnttab_alloc_grant_references( 220 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 221 if (vdp->xdf_gnt_callback.next == NULL) { 222 SETDMACBON(vdp); 223 gnttab_request_free_callback( 224 &vdp->xdf_gnt_callback, 225 (void (*)(void *))xdf_dmacallback, 226 (void *)vdp, 227 BLKIF_MAX_SEGMENTS_PER_REQUEST); 228 } 229 return (NULL); 230 } 231 232 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 233 if (gs == NULL) { 234 gnttab_free_grant_references(gh); 235 if (vdp->xdf_timeout_id == 0) 236 /* restart I/O after one second */ 237 vdp->xdf_timeout_id = 238 timeout(xdf_timeout_handler, vdp, hz); 239 return (NULL); 240 } 241 242 /* init gs_slot */ 243 gs->gs_oeid = vdp->xdf_peer; 244 gs->gs_isread = isread; 245 gs->gs_ghead = gh; 246 gs->gs_ngrefs = 0; 247 248 return (gs); 249 } 250 251 static void 252 gs_free(ge_slot_t *gs) 253 { 254 int i; 255 256 /* release all grant table entry resources used in this slot */ 257 for (i = 0; i < gs->gs_ngrefs; i++) 258 gnttab_end_foreign_access(gs->gs_ge[i], !gs->gs_isread, 0); 259 gnttab_free_grant_references(gs->gs_ghead); 260 list_remove(&gs->gs_vreq->v_gs, gs); 261 kmem_cache_free(xdf_gs_cache, gs); 262 } 263 264 static grant_ref_t 265 gs_grant(ge_slot_t *gs, mfn_t mfn) 266 { 267 grant_ref_t gr = gnttab_claim_grant_reference(&gs->gs_ghead); 268 269 ASSERT(gr != -1); 270 ASSERT(gs->gs_ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 271 gs->gs_ge[gs->gs_ngrefs++] = gr; 272 gnttab_grant_foreign_access_ref(gr, gs->gs_oeid, mfn, !gs->gs_isread); 273 274 return (gr); 275 } 276 277 /* 278 * Alloc a vreq for this bp 279 * bp->av_back contains the pointer to the vreq upon return 280 */ 281 static v_req_t * 282 vreq_get(xdf_t *vdp, buf_t *bp) 283 { 284 v_req_t *vreq = NULL; 285 286 ASSERT(BP_VREQ(bp) == NULL); 287 288 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 289 if (vreq == NULL) { 290 if (vdp->xdf_timeout_id == 0) 291 /* restart I/O after one second */ 292 vdp->xdf_timeout_id = 293 timeout(xdf_timeout_handler, vdp, hz); 294 return (NULL); 295 } 296 bzero(vreq, sizeof (v_req_t)); 297 list_create(&vreq->v_gs, sizeof (ge_slot_t), 298 offsetof(ge_slot_t, gs_vreq_link)); 299 vreq->v_buf = bp; 300 vreq->v_status = VREQ_INIT; 301 vreq->v_runq = B_FALSE; 302 BP_VREQ_SET(bp, vreq); 303 /* init of other fields in vreq is up to the caller */ 304 305 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 306 307 return (vreq); 308 } 309 310 static void 311 vreq_free(xdf_t *vdp, v_req_t *vreq) 312 { 313 buf_t *bp = vreq->v_buf; 314 315 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 316 ASSERT(BP_VREQ(bp) == vreq); 317 318 list_remove(&vdp->xdf_vreq_act, vreq); 319 320 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 321 goto done; 322 323 switch (vreq->v_status) { 324 case VREQ_DMAWIN_DONE: 325 case VREQ_GS_ALLOCED: 326 case VREQ_DMABUF_BOUND: 327 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 328 /*FALLTHRU*/ 329 case VREQ_DMAMEM_ALLOCED: 330 if (!ALIGNED_XFER(bp)) { 331 ASSERT(vreq->v_abuf != NULL); 332 if (!IS_ERROR(bp) && IS_READ(bp)) 333 bcopy(vreq->v_abuf, bp->b_un.b_addr, 334 bp->b_bcount); 335 ddi_dma_mem_free(&vreq->v_align); 336 } 337 /*FALLTHRU*/ 338 case VREQ_MEMDMAHDL_ALLOCED: 339 if (!ALIGNED_XFER(bp)) 340 ddi_dma_free_handle(&vreq->v_memdmahdl); 341 /*FALLTHRU*/ 342 case VREQ_DMAHDL_ALLOCED: 343 ddi_dma_free_handle(&vreq->v_dmahdl); 344 break; 345 default: 346 break; 347 } 348 done: 349 ASSERT(!vreq->v_runq); 350 list_destroy(&vreq->v_gs); 351 kmem_cache_free(xdf_vreq_cache, vreq); 352 } 353 354 /* 355 * Snarf new data if our flush block was re-written 356 */ 357 static void 358 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 359 { 360 int nblks; 361 boolean_t mapin; 362 363 if (IS_WRITE_BARRIER(vdp, bp)) 364 return; /* write was a flush write */ 365 366 mapin = B_FALSE; 367 nblks = bp->b_bcount >> DEV_BSHIFT; 368 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 369 xdf_fbrewrites++; 370 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 371 mapin = B_TRUE; 372 bp_mapin(bp); 373 } 374 bcopy(bp->b_un.b_addr + 375 ((xdf_flush_block - blkno) << DEV_BSHIFT), 376 vdp->xdf_cache_flush_block, DEV_BSIZE); 377 if (mapin) 378 bp_mapout(bp); 379 } 380 } 381 382 /* 383 * Initalize the DMA and grant table resources for the buf 384 */ 385 static int 386 vreq_setup(xdf_t *vdp, v_req_t *vreq) 387 { 388 int rc; 389 ddi_dma_attr_t dmaattr; 390 uint_t ndcs, ndws; 391 ddi_dma_handle_t dh; 392 ddi_dma_handle_t mdh; 393 ddi_dma_cookie_t dc; 394 ddi_acc_handle_t abh; 395 caddr_t aba; 396 ge_slot_t *gs; 397 size_t bufsz; 398 off_t off; 399 size_t sz; 400 buf_t *bp = vreq->v_buf; 401 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 402 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 403 404 switch (vreq->v_status) { 405 case VREQ_INIT: 406 if (IS_FLUSH_DISKCACHE(bp)) { 407 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 408 DPRINTF(DMA_DBG, ("xdf@%s: " 409 "get ge_slotfailed\n", vdp->xdf_addr)); 410 return (DDI_FAILURE); 411 } 412 vreq->v_blkno = 0; 413 vreq->v_nslots = 1; 414 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 415 vreq->v_status = VREQ_GS_ALLOCED; 416 gs->gs_vreq = vreq; 417 list_insert_head(&vreq->v_gs, gs); 418 return (DDI_SUCCESS); 419 } 420 421 if (IS_WRITE_BARRIER(vdp, bp)) 422 vreq->v_flush_diskcache = WRITE_BARRIER; 423 vreq->v_blkno = bp->b_blkno + 424 (diskaddr_t)(uintptr_t)bp->b_private; 425 /* See if we wrote new data to our flush block */ 426 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 427 check_fbwrite(vdp, bp, vreq->v_blkno); 428 vreq->v_status = VREQ_INIT_DONE; 429 /*FALLTHRU*/ 430 431 case VREQ_INIT_DONE: 432 /* 433 * alloc DMA handle 434 */ 435 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 436 xdf_dmacallback, (caddr_t)vdp, &dh); 437 if (rc != DDI_SUCCESS) { 438 SETDMACBON(vdp); 439 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 440 vdp->xdf_addr)); 441 return (DDI_FAILURE); 442 } 443 444 vreq->v_dmahdl = dh; 445 vreq->v_status = VREQ_DMAHDL_ALLOCED; 446 /*FALLTHRU*/ 447 448 case VREQ_DMAHDL_ALLOCED: 449 /* 450 * alloc dma handle for 512-byte aligned buf 451 */ 452 if (!ALIGNED_XFER(bp)) { 453 /* 454 * XXPV: we need to temporarily enlarge the seg 455 * boundary and s/g length to work round CR6381968 456 */ 457 dmaattr = xb_dma_attr; 458 dmaattr.dma_attr_seg = (uint64_t)-1; 459 dmaattr.dma_attr_sgllen = INT_MAX; 460 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 461 xdf_dmacallback, (caddr_t)vdp, &mdh); 462 if (rc != DDI_SUCCESS) { 463 SETDMACBON(vdp); 464 DPRINTF(DMA_DBG, ("xdf@%s: " 465 "unaligned buf DMAhandle alloc failed\n", 466 vdp->xdf_addr)); 467 return (DDI_FAILURE); 468 } 469 vreq->v_memdmahdl = mdh; 470 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 471 } 472 /*FALLTHRU*/ 473 474 case VREQ_MEMDMAHDL_ALLOCED: 475 /* 476 * alloc 512-byte aligned buf 477 */ 478 if (!ALIGNED_XFER(bp)) { 479 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 480 bp_mapin(bp); 481 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 482 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 483 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 484 &aba, &bufsz, &abh); 485 if (rc != DDI_SUCCESS) { 486 SETDMACBON(vdp); 487 DPRINTF(DMA_DBG, ("xdf@%s: " 488 "DMA mem allocation failed\n", 489 vdp->xdf_addr)); 490 return (DDI_FAILURE); 491 } 492 493 vreq->v_abuf = aba; 494 vreq->v_align = abh; 495 vreq->v_status = VREQ_DMAMEM_ALLOCED; 496 497 ASSERT(bufsz >= bp->b_bcount); 498 if (!IS_READ(bp)) 499 bcopy(bp->b_un.b_addr, vreq->v_abuf, 500 bp->b_bcount); 501 } 502 /*FALLTHRU*/ 503 504 case VREQ_DMAMEM_ALLOCED: 505 /* 506 * dma bind 507 */ 508 if (ALIGNED_XFER(bp)) { 509 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 510 dma_flags, xdf_dmacallback, (caddr_t)vdp, 511 &dc, &ndcs); 512 } else { 513 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 514 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 515 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 516 } 517 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 518 /* get num of dma windows */ 519 if (rc == DDI_DMA_PARTIAL_MAP) { 520 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 521 ASSERT(rc == DDI_SUCCESS); 522 } else { 523 ndws = 1; 524 } 525 } else { 526 SETDMACBON(vdp); 527 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 528 vdp->xdf_addr)); 529 return (DDI_FAILURE); 530 } 531 532 vreq->v_dmac = dc; 533 vreq->v_dmaw = 0; 534 vreq->v_ndmacs = ndcs; 535 vreq->v_ndmaws = ndws; 536 vreq->v_nslots = ndws; 537 vreq->v_status = VREQ_DMABUF_BOUND; 538 /*FALLTHRU*/ 539 540 case VREQ_DMABUF_BOUND: 541 /* 542 * get ge_slot, callback is set upon failure from gs_get(), 543 * if not set previously 544 */ 545 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 546 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 547 vdp->xdf_addr)); 548 return (DDI_FAILURE); 549 } 550 551 vreq->v_status = VREQ_GS_ALLOCED; 552 gs->gs_vreq = vreq; 553 list_insert_head(&vreq->v_gs, gs); 554 break; 555 556 case VREQ_GS_ALLOCED: 557 /* nothing need to be done */ 558 break; 559 560 case VREQ_DMAWIN_DONE: 561 /* 562 * move to the next dma window 563 */ 564 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 565 566 /* get a ge_slot for this DMA window */ 567 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 568 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 569 vdp->xdf_addr)); 570 return (DDI_FAILURE); 571 } 572 573 vreq->v_dmaw++; 574 VERIFY(ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 575 &vreq->v_dmac, &vreq->v_ndmacs) == DDI_SUCCESS); 576 vreq->v_status = VREQ_GS_ALLOCED; 577 gs->gs_vreq = vreq; 578 list_insert_head(&vreq->v_gs, gs); 579 break; 580 581 default: 582 return (DDI_FAILURE); 583 } 584 585 return (DDI_SUCCESS); 586 } 587 588 static int 589 xdf_cmlb_attach(xdf_t *vdp) 590 { 591 dev_info_t *dip = vdp->xdf_dip; 592 593 return (cmlb_attach(dip, &xdf_lb_ops, 594 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 595 XD_IS_RM(vdp), 596 B_TRUE, 597 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD, 598 #if defined(XPV_HVM_DRIVER) 599 (XD_IS_CD(vdp) ? 0 : CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT) | 600 CMLB_INTERNAL_MINOR_NODES, 601 #else /* !XPV_HVM_DRIVER */ 602 XD_IS_CD(vdp) ? 0 : CMLB_FAKE_LABEL_ONE_PARTITION, 603 #endif /* !XPV_HVM_DRIVER */ 604 vdp->xdf_vd_lbl, NULL)); 605 } 606 607 static void 608 xdf_io_err(buf_t *bp, int err, size_t resid) 609 { 610 bioerror(bp, err); 611 if (resid == 0) 612 bp->b_resid = bp->b_bcount; 613 biodone(bp); 614 } 615 616 static void 617 xdf_kstat_enter(xdf_t *vdp, buf_t *bp) 618 { 619 v_req_t *vreq = BP_VREQ(bp); 620 621 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 622 623 if (vdp->xdf_xdev_iostat == NULL) 624 return; 625 if ((vreq != NULL) && vreq->v_runq) { 626 kstat_runq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 627 } else { 628 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 629 } 630 } 631 632 static void 633 xdf_kstat_exit(xdf_t *vdp, buf_t *bp) 634 { 635 v_req_t *vreq = BP_VREQ(bp); 636 637 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 638 639 if (vdp->xdf_xdev_iostat == NULL) 640 return; 641 if ((vreq != NULL) && vreq->v_runq) { 642 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 643 } else { 644 kstat_waitq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 645 } 646 } 647 648 static void 649 xdf_kstat_waitq_to_runq(xdf_t *vdp, buf_t *bp) 650 { 651 v_req_t *vreq = BP_VREQ(bp); 652 653 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 654 ASSERT(!vreq->v_runq); 655 656 vreq->v_runq = B_TRUE; 657 if (vdp->xdf_xdev_iostat == NULL) 658 return; 659 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 660 } 661 662 static void 663 xdf_kstat_runq_to_waitq(xdf_t *vdp, buf_t *bp) 664 { 665 v_req_t *vreq = BP_VREQ(bp); 666 667 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 668 ASSERT(vreq->v_runq); 669 670 vreq->v_runq = B_FALSE; 671 if (vdp->xdf_xdev_iostat == NULL) 672 return; 673 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 674 } 675 676 int 677 xdf_kstat_create(dev_info_t *dip, char *ks_module, int instance) 678 { 679 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 680 kstat_t *kstat; 681 buf_t *bp; 682 683 if ((kstat = kstat_create( 684 ks_module, instance, NULL, "disk", 685 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) 686 return (-1); 687 688 /* See comment about locking in xdf_kstat_delete(). */ 689 mutex_enter(&vdp->xdf_iostat_lk); 690 mutex_enter(&vdp->xdf_dev_lk); 691 692 /* only one kstat can exist at a time */ 693 if (vdp->xdf_xdev_iostat != NULL) { 694 mutex_exit(&vdp->xdf_dev_lk); 695 mutex_exit(&vdp->xdf_iostat_lk); 696 kstat_delete(kstat); 697 return (-1); 698 } 699 700 vdp->xdf_xdev_iostat = kstat; 701 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 702 kstat_install(vdp->xdf_xdev_iostat); 703 704 /* 705 * Now that we've created a kstat, we need to update the waitq and 706 * runq counts for the kstat to reflect our current state. 707 * 708 * For a buf_t structure to be on the runq, it must have a ring 709 * buffer slot associated with it. To get a ring buffer slot the 710 * buf must first have a v_req_t and a ge_slot_t associated with it. 711 * Then when it is granted a ring buffer slot, v_runq will be set to 712 * true. 713 * 714 * For a buf_t structure to be on the waitq, it must not be on the 715 * runq. So to find all the buf_t's that should be on waitq, we 716 * walk the active buf list and add any buf_t's which aren't on the 717 * runq to the waitq. 718 */ 719 bp = vdp->xdf_f_act; 720 while (bp != NULL) { 721 xdf_kstat_enter(vdp, bp); 722 bp = bp->av_forw; 723 } 724 if (vdp->xdf_ready_tq_bp != NULL) 725 xdf_kstat_enter(vdp, vdp->xdf_ready_tq_bp); 726 727 mutex_exit(&vdp->xdf_dev_lk); 728 mutex_exit(&vdp->xdf_iostat_lk); 729 return (0); 730 } 731 732 void 733 xdf_kstat_delete(dev_info_t *dip) 734 { 735 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 736 kstat_t *kstat; 737 buf_t *bp; 738 739 /* 740 * The locking order here is xdf_iostat_lk and then xdf_dev_lk. 741 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer 742 * and the contents of the our kstat. xdf_iostat_lk is used 743 * to protect the allocation and freeing of the actual kstat. 744 * xdf_dev_lk can't be used for this purpose because kstat 745 * readers use it to access the contents of the kstat and 746 * hence it can't be held when calling kstat_delete(). 747 */ 748 mutex_enter(&vdp->xdf_iostat_lk); 749 mutex_enter(&vdp->xdf_dev_lk); 750 751 if (vdp->xdf_xdev_iostat == NULL) { 752 mutex_exit(&vdp->xdf_dev_lk); 753 mutex_exit(&vdp->xdf_iostat_lk); 754 return; 755 } 756 757 /* 758 * We're about to destroy the kstat structures, so it isn't really 759 * necessary to update the runq and waitq counts. But, since this 760 * isn't a hot code path we can afford to be a little pedantic and 761 * go ahead and decrement the runq and waitq kstat counters to zero 762 * before free'ing them. This helps us ensure that we've gotten all 763 * our accounting correct. 764 * 765 * For an explanation of how we determine which buffers go on the 766 * runq vs which go on the waitq, see the comments in 767 * xdf_kstat_create(). 768 */ 769 bp = vdp->xdf_f_act; 770 while (bp != NULL) { 771 xdf_kstat_exit(vdp, bp); 772 bp = bp->av_forw; 773 } 774 if (vdp->xdf_ready_tq_bp != NULL) 775 xdf_kstat_exit(vdp, vdp->xdf_ready_tq_bp); 776 777 kstat = vdp->xdf_xdev_iostat; 778 vdp->xdf_xdev_iostat = NULL; 779 mutex_exit(&vdp->xdf_dev_lk); 780 kstat_delete(kstat); 781 mutex_exit(&vdp->xdf_iostat_lk); 782 } 783 784 /* 785 * Add an IO requests onto the active queue. 786 * 787 * We have to detect IOs generated by xdf_ready_tq_thread. These IOs 788 * are used to establish a connection to the backend, so they recieve 789 * priority over all other IOs. Since xdf_ready_tq_thread only does 790 * synchronous IO, there can only be one xdf_ready_tq_thread request at any 791 * given time and we record the buf associated with that request in 792 * xdf_ready_tq_bp. 793 */ 794 static void 795 xdf_bp_push(xdf_t *vdp, buf_t *bp) 796 { 797 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 798 ASSERT(bp->av_forw == NULL); 799 800 xdf_kstat_enter(vdp, bp); 801 802 if (curthread == vdp->xdf_ready_tq_thread) { 803 /* new IO requests from the ready thread */ 804 ASSERT(vdp->xdf_ready_tq_bp == NULL); 805 vdp->xdf_ready_tq_bp = bp; 806 return; 807 } 808 809 /* this is normal IO request */ 810 ASSERT(bp != vdp->xdf_ready_tq_bp); 811 812 if (vdp->xdf_f_act == NULL) { 813 /* this is only only IO on the active queue */ 814 ASSERT(vdp->xdf_l_act == NULL); 815 ASSERT(vdp->xdf_i_act == NULL); 816 vdp->xdf_f_act = vdp->xdf_l_act = vdp->xdf_i_act = bp; 817 return; 818 } 819 820 /* add this IO to the tail of the active queue */ 821 vdp->xdf_l_act->av_forw = bp; 822 vdp->xdf_l_act = bp; 823 if (vdp->xdf_i_act == NULL) 824 vdp->xdf_i_act = bp; 825 } 826 827 static void 828 xdf_bp_pop(xdf_t *vdp, buf_t *bp) 829 { 830 buf_t *bp_iter; 831 832 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 833 ASSERT(VREQ_DONE(BP_VREQ(bp))); 834 835 if (vdp->xdf_ready_tq_bp == bp) { 836 /* we're done with a ready thread IO request */ 837 ASSERT(bp->av_forw == NULL); 838 vdp->xdf_ready_tq_bp = NULL; 839 return; 840 } 841 842 /* we're done with a normal IO request */ 843 ASSERT((bp->av_forw != NULL) || (bp == vdp->xdf_l_act)); 844 ASSERT((bp->av_forw == NULL) || (bp != vdp->xdf_l_act)); 845 ASSERT(VREQ_DONE(BP_VREQ(vdp->xdf_f_act))); 846 ASSERT(vdp->xdf_f_act != vdp->xdf_i_act); 847 848 if (bp == vdp->xdf_f_act) { 849 /* This IO was at the head of our active queue. */ 850 vdp->xdf_f_act = bp->av_forw; 851 if (bp == vdp->xdf_l_act) 852 vdp->xdf_l_act = NULL; 853 } else { 854 /* There IO finished before some other pending IOs. */ 855 bp_iter = vdp->xdf_f_act; 856 while (bp != bp_iter->av_forw) { 857 bp_iter = bp_iter->av_forw; 858 ASSERT(VREQ_DONE(BP_VREQ(bp_iter))); 859 ASSERT(bp_iter != vdp->xdf_i_act); 860 } 861 bp_iter->av_forw = bp->av_forw; 862 if (bp == vdp->xdf_l_act) 863 vdp->xdf_l_act = bp_iter; 864 } 865 bp->av_forw = NULL; 866 } 867 868 static buf_t * 869 xdf_bp_next(xdf_t *vdp) 870 { 871 v_req_t *vreq; 872 buf_t *bp; 873 874 if (vdp->xdf_state == XD_CONNECTED) { 875 /* 876 * If we're in the XD_CONNECTED state, we only service IOs 877 * from the xdf_ready_tq_thread thread. 878 */ 879 if ((bp = vdp->xdf_ready_tq_bp) == NULL) 880 return (NULL); 881 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 882 return (bp); 883 return (NULL); 884 } 885 886 /* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */ 887 if (vdp->xdf_state != XD_READY) 888 return (NULL); 889 890 ASSERT(vdp->xdf_ready_tq_bp == NULL); 891 for (;;) { 892 if ((bp = vdp->xdf_i_act) == NULL) 893 return (NULL); 894 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 895 return (bp); 896 897 /* advance the active buf index pointer */ 898 vdp->xdf_i_act = bp->av_forw; 899 } 900 } 901 902 static void 903 xdf_io_fini(xdf_t *vdp, uint64_t id, int bioerr) 904 { 905 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 906 v_req_t *vreq = gs->gs_vreq; 907 buf_t *bp = vreq->v_buf; 908 909 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 910 ASSERT(BP_VREQ(bp) == vreq); 911 912 gs_free(gs); 913 914 if (bioerr != 0) 915 bioerror(bp, bioerr); 916 ASSERT(vreq->v_nslots > 0); 917 if (--vreq->v_nslots > 0) 918 return; 919 920 /* remove this IO from our active queue */ 921 xdf_bp_pop(vdp, bp); 922 923 ASSERT(vreq->v_runq); 924 xdf_kstat_exit(vdp, bp); 925 vreq->v_runq = B_FALSE; 926 vreq_free(vdp, vreq); 927 928 if (IS_ERROR(bp)) { 929 xdf_io_err(bp, geterror(bp), 0); 930 } else if (bp->b_resid != 0) { 931 /* Partial transfers are an error */ 932 xdf_io_err(bp, EIO, bp->b_resid); 933 } else { 934 biodone(bp); 935 } 936 } 937 938 /* 939 * xdf interrupt handler 940 */ 941 static uint_t 942 xdf_intr_locked(xdf_t *vdp) 943 { 944 xendev_ring_t *xbr; 945 blkif_response_t *resp; 946 int bioerr; 947 uint64_t id; 948 uint8_t op; 949 uint16_t status; 950 ddi_acc_handle_t acchdl; 951 952 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 953 954 if ((xbr = vdp->xdf_xb_ring) == NULL) 955 return (DDI_INTR_UNCLAIMED); 956 957 acchdl = vdp->xdf_xb_ring_hdl; 958 959 /* 960 * complete all requests which have a response 961 */ 962 while (resp = xvdi_ring_get_response(xbr)) { 963 id = ddi_get64(acchdl, &resp->id); 964 op = ddi_get8(acchdl, &resp->operation); 965 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 966 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 967 op, id, status)); 968 969 if (status != BLKIF_RSP_OKAY) { 970 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 971 vdp->xdf_addr, 972 (op == BLKIF_OP_READ) ? "reading" : "writing")); 973 bioerr = EIO; 974 } else { 975 bioerr = 0; 976 } 977 978 xdf_io_fini(vdp, id, bioerr); 979 } 980 return (DDI_INTR_CLAIMED); 981 } 982 983 /* 984 * xdf_intr runs at PIL 5, so no one else can grab xdf_dev_lk and 985 * block at a lower pil. 986 */ 987 static uint_t 988 xdf_intr(caddr_t arg) 989 { 990 xdf_t *vdp = (xdf_t *)arg; 991 int rv; 992 993 mutex_enter(&vdp->xdf_dev_lk); 994 rv = xdf_intr_locked(vdp); 995 mutex_exit(&vdp->xdf_dev_lk); 996 997 if (!do_polled_io) 998 xdf_io_start(vdp); 999 1000 return (rv); 1001 } 1002 1003 static void 1004 xdf_ring_push(xdf_t *vdp) 1005 { 1006 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1007 1008 if (vdp->xdf_xb_ring == NULL) 1009 return; 1010 1011 if (xvdi_ring_push_request(vdp->xdf_xb_ring)) { 1012 DPRINTF(IO_DBG, ( 1013 "xdf@%s: xdf_ring_push: sent request(s) to backend\n", 1014 vdp->xdf_addr)); 1015 } 1016 1017 if (xvdi_get_evtchn(vdp->xdf_dip) != INVALID_EVTCHN) 1018 xvdi_notify_oe(vdp->xdf_dip); 1019 } 1020 1021 static int 1022 xdf_ring_drain_locked(xdf_t *vdp) 1023 { 1024 int pollc, rv = 0; 1025 1026 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1027 1028 if (xdf_debug & SUSRES_DBG) 1029 xen_printf("xdf_ring_drain: start\n"); 1030 1031 for (pollc = 0; pollc < XDF_DRAIN_RETRY_COUNT; pollc++) { 1032 if (vdp->xdf_xb_ring == NULL) 1033 goto out; 1034 1035 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1036 (void) xdf_intr_locked(vdp); 1037 if (!xvdi_ring_has_incomp_request(vdp->xdf_xb_ring)) 1038 goto out; 1039 xdf_ring_push(vdp); 1040 1041 /* file-backed devices can be slow */ 1042 mutex_exit(&vdp->xdf_dev_lk); 1043 #ifdef XPV_HVM_DRIVER 1044 (void) HYPERVISOR_yield(); 1045 #endif /* XPV_HVM_DRIVER */ 1046 delay(drv_usectohz(XDF_DRAIN_MSEC_DELAY)); 1047 mutex_enter(&vdp->xdf_dev_lk); 1048 } 1049 cmn_err(CE_WARN, "xdf@%s: xdf_ring_drain: timeout", vdp->xdf_addr); 1050 1051 out: 1052 if (vdp->xdf_xb_ring != NULL) { 1053 if (xvdi_ring_has_incomp_request(vdp->xdf_xb_ring) || 1054 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1055 rv = EIO; 1056 } 1057 if (xdf_debug & SUSRES_DBG) 1058 xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n", 1059 vdp->xdf_addr, rv); 1060 return (rv); 1061 } 1062 1063 static int 1064 xdf_ring_drain(xdf_t *vdp) 1065 { 1066 int rv; 1067 mutex_enter(&vdp->xdf_dev_lk); 1068 rv = xdf_ring_drain_locked(vdp); 1069 mutex_exit(&vdp->xdf_dev_lk); 1070 return (rv); 1071 } 1072 1073 /* 1074 * Destroy all v_req_t, grant table entries, and our ring buffer. 1075 */ 1076 static void 1077 xdf_ring_destroy(xdf_t *vdp) 1078 { 1079 v_req_t *vreq; 1080 buf_t *bp; 1081 ge_slot_t *gs; 1082 1083 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1084 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1085 1086 if ((vdp->xdf_state != XD_INIT) && 1087 (vdp->xdf_state != XD_CONNECTED) && 1088 (vdp->xdf_state != XD_READY)) { 1089 ASSERT(vdp->xdf_xb_ring == NULL); 1090 ASSERT(vdp->xdf_xb_ring_hdl == NULL); 1091 ASSERT(vdp->xdf_peer == INVALID_DOMID); 1092 ASSERT(vdp->xdf_evtchn == INVALID_EVTCHN); 1093 ASSERT(list_is_empty(&vdp->xdf_vreq_act)); 1094 return; 1095 } 1096 1097 /* 1098 * We don't want to recieve async notifications from the backend 1099 * when it finishes processing ring entries. 1100 */ 1101 #ifdef XPV_HVM_DRIVER 1102 ec_unbind_evtchn(vdp->xdf_evtchn); 1103 #else /* !XPV_HVM_DRIVER */ 1104 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1105 #endif /* !XPV_HVM_DRIVER */ 1106 1107 /* 1108 * Drain any requests in the ring. We need to do this before we 1109 * can free grant table entries, because if active ring entries 1110 * point to grants, then the backend could be trying to access 1111 * those grants. 1112 */ 1113 (void) xdf_ring_drain_locked(vdp); 1114 1115 /* We're done talking to the backend so free up our event channel */ 1116 xvdi_free_evtchn(vdp->xdf_dip); 1117 vdp->xdf_evtchn = INVALID_EVTCHN; 1118 1119 while ((vreq = list_head(&vdp->xdf_vreq_act)) != NULL) { 1120 bp = vreq->v_buf; 1121 ASSERT(BP_VREQ(bp) == vreq); 1122 1123 /* Free up any grant table entries associaed with this IO */ 1124 while ((gs = list_head(&vreq->v_gs)) != NULL) 1125 gs_free(gs); 1126 1127 /* If this IO was on the runq, move it back to the waitq. */ 1128 if (vreq->v_runq) 1129 xdf_kstat_runq_to_waitq(vdp, bp); 1130 1131 /* 1132 * Reset any buf IO state since we're going to re-issue the 1133 * IO when we reconnect. 1134 */ 1135 vreq_free(vdp, vreq); 1136 BP_VREQ_SET(bp, NULL); 1137 bioerror(bp, 0); 1138 } 1139 1140 /* reset the active queue index pointer */ 1141 vdp->xdf_i_act = vdp->xdf_f_act; 1142 1143 /* Destroy the ring */ 1144 xvdi_free_ring(vdp->xdf_xb_ring); 1145 vdp->xdf_xb_ring = NULL; 1146 vdp->xdf_xb_ring_hdl = NULL; 1147 vdp->xdf_peer = INVALID_DOMID; 1148 } 1149 1150 void 1151 xdfmin(struct buf *bp) 1152 { 1153 if (bp->b_bcount > xdf_maxphys) 1154 bp->b_bcount = xdf_maxphys; 1155 } 1156 1157 /* 1158 * Check if we have a pending "eject" media request. 1159 */ 1160 static int 1161 xdf_eject_pending(xdf_t *vdp) 1162 { 1163 dev_info_t *dip = vdp->xdf_dip; 1164 char *xsname, *str; 1165 1166 if (!vdp->xdf_media_req_supported) 1167 return (B_FALSE); 1168 1169 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1170 (xenbus_read_str(xsname, XBP_MEDIA_REQ, &str) != 0)) 1171 return (B_FALSE); 1172 1173 if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) { 1174 strfree(str); 1175 return (B_FALSE); 1176 } 1177 strfree(str); 1178 return (B_TRUE); 1179 } 1180 1181 /* 1182 * Generate a media request. 1183 */ 1184 static int 1185 xdf_media_req(xdf_t *vdp, char *req, boolean_t media_required) 1186 { 1187 dev_info_t *dip = vdp->xdf_dip; 1188 char *xsname; 1189 1190 /* 1191 * we can't be holding xdf_dev_lk because xenbus_printf() can 1192 * block while waiting for a PIL 1 interrupt message. this 1193 * would cause a deadlock with xdf_intr() which needs to grab 1194 * xdf_dev_lk as well and runs at PIL 5. 1195 */ 1196 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1197 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1198 1199 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1200 return (ENXIO); 1201 1202 /* Check if we support media requests */ 1203 if (!XD_IS_CD(vdp) || !vdp->xdf_media_req_supported) 1204 return (ENOTTY); 1205 1206 /* If an eject is pending then don't allow any new requests */ 1207 if (xdf_eject_pending(vdp)) 1208 return (ENXIO); 1209 1210 /* Make sure that there is media present */ 1211 if (media_required && (vdp->xdf_xdev_nblocks == 0)) 1212 return (ENXIO); 1213 1214 /* We only allow operations when the device is ready and connected */ 1215 if (vdp->xdf_state != XD_READY) 1216 return (EIO); 1217 1218 if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ, "%s", req) != 0) 1219 return (EIO); 1220 1221 return (0); 1222 } 1223 1224 /* 1225 * populate a single blkif_request_t w/ a buf 1226 */ 1227 static void 1228 xdf_process_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1229 { 1230 grant_ref_t gr; 1231 uint8_t fsect, lsect; 1232 size_t bcnt; 1233 paddr_t dma_addr; 1234 off_t blk_off; 1235 dev_info_t *dip = vdp->xdf_dip; 1236 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1237 v_req_t *vreq = BP_VREQ(bp); 1238 uint64_t blkno = vreq->v_blkno; 1239 uint_t ndmacs = vreq->v_ndmacs; 1240 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1241 int seg = 0; 1242 int isread = IS_READ(bp); 1243 ge_slot_t *gs = list_head(&vreq->v_gs); 1244 1245 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1246 ASSERT(vreq->v_status == VREQ_GS_ALLOCED); 1247 1248 if (isread) 1249 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1250 else { 1251 switch (vreq->v_flush_diskcache) { 1252 case FLUSH_DISKCACHE: 1253 ddi_put8(acchdl, &rreq->operation, 1254 BLKIF_OP_FLUSH_DISKCACHE); 1255 ddi_put16(acchdl, &rreq->handle, vdev); 1256 ddi_put64(acchdl, &rreq->id, 1257 (uint64_t)(uintptr_t)(gs)); 1258 ddi_put8(acchdl, &rreq->nr_segments, 0); 1259 vreq->v_status = VREQ_DMAWIN_DONE; 1260 return; 1261 case WRITE_BARRIER: 1262 ddi_put8(acchdl, &rreq->operation, 1263 BLKIF_OP_WRITE_BARRIER); 1264 break; 1265 default: 1266 if (!vdp->xdf_wce) 1267 ddi_put8(acchdl, &rreq->operation, 1268 BLKIF_OP_WRITE_BARRIER); 1269 else 1270 ddi_put8(acchdl, &rreq->operation, 1271 BLKIF_OP_WRITE); 1272 break; 1273 } 1274 } 1275 1276 ddi_put16(acchdl, &rreq->handle, vdev); 1277 ddi_put64(acchdl, &rreq->sector_number, blkno); 1278 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(gs)); 1279 1280 /* 1281 * loop until all segments are populated or no more dma cookie in buf 1282 */ 1283 for (;;) { 1284 /* 1285 * Each segment of a blkif request can transfer up to 1286 * one 4K page of data. 1287 */ 1288 bcnt = vreq->v_dmac.dmac_size; 1289 dma_addr = vreq->v_dmac.dmac_laddress; 1290 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1291 fsect = blk_off >> XB_BSHIFT; 1292 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1293 1294 ASSERT(bcnt <= PAGESIZE); 1295 ASSERT((bcnt % XB_BSIZE) == 0); 1296 ASSERT((blk_off & XB_BMASK) == 0); 1297 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1298 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1299 1300 gr = gs_grant(gs, PATOMA(dma_addr) >> PAGESHIFT); 1301 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1302 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1303 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1304 1305 DPRINTF(IO_DBG, ( 1306 "xdf@%s: seg%d: dmacS %lu blk_off %ld\n", 1307 vdp->xdf_addr, seg, vreq->v_dmac.dmac_size, blk_off)); 1308 DPRINTF(IO_DBG, ( 1309 "xdf@%s: seg%d: fs %d ls %d gr %d dma 0x%"PRIx64"\n", 1310 vdp->xdf_addr, seg, fsect, lsect, gr, dma_addr)); 1311 1312 blkno += (bcnt >> XB_BSHIFT); 1313 seg++; 1314 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1315 if (--ndmacs) { 1316 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1317 continue; 1318 } 1319 1320 vreq->v_status = VREQ_DMAWIN_DONE; 1321 vreq->v_blkno = blkno; 1322 break; 1323 } 1324 ddi_put8(acchdl, &rreq->nr_segments, seg); 1325 DPRINTF(IO_DBG, ( 1326 "xdf@%s: xdf_process_rreq: request id=%"PRIx64" ready\n", 1327 vdp->xdf_addr, rreq->id)); 1328 } 1329 1330 static void 1331 xdf_io_start(xdf_t *vdp) 1332 { 1333 struct buf *bp; 1334 v_req_t *vreq; 1335 blkif_request_t *rreq; 1336 boolean_t rreqready = B_FALSE; 1337 1338 mutex_enter(&vdp->xdf_dev_lk); 1339 1340 /* 1341 * Populate the ring request(s). Loop until there is no buf to 1342 * transfer or no free slot available in I/O ring. 1343 */ 1344 for (;;) { 1345 /* don't start any new IO if we're suspending */ 1346 if (vdp->xdf_suspending) 1347 break; 1348 if ((bp = xdf_bp_next(vdp)) == NULL) 1349 break; 1350 1351 /* if the buf doesn't already have a vreq, allocate one */ 1352 if (((vreq = BP_VREQ(bp)) == NULL) && 1353 ((vreq = vreq_get(vdp, bp)) == NULL)) 1354 break; 1355 1356 /* alloc DMA/GTE resources */ 1357 if (vreq_setup(vdp, vreq) != DDI_SUCCESS) 1358 break; 1359 1360 /* get next blkif_request in the ring */ 1361 if ((rreq = xvdi_ring_get_request(vdp->xdf_xb_ring)) == NULL) 1362 break; 1363 bzero(rreq, sizeof (blkif_request_t)); 1364 rreqready = B_TRUE; 1365 1366 /* populate blkif_request with this buf */ 1367 xdf_process_rreq(vdp, bp, rreq); 1368 1369 /* 1370 * This buffer/vreq pair is has been allocated a ring buffer 1371 * resources, so if it isn't already in our runq, add it. 1372 */ 1373 if (!vreq->v_runq) 1374 xdf_kstat_waitq_to_runq(vdp, bp); 1375 } 1376 1377 /* Send the request(s) to the backend */ 1378 if (rreqready) 1379 xdf_ring_push(vdp); 1380 1381 mutex_exit(&vdp->xdf_dev_lk); 1382 } 1383 1384 1385 /* check if partition is open, -1 - check all partitions on the disk */ 1386 static boolean_t 1387 xdf_isopen(xdf_t *vdp, int partition) 1388 { 1389 int i; 1390 ulong_t parbit; 1391 boolean_t rval = B_FALSE; 1392 1393 ASSERT((partition == -1) || 1394 ((partition >= 0) || (partition < XDF_PEXT))); 1395 1396 if (partition == -1) 1397 parbit = (ulong_t)-1; 1398 else 1399 parbit = 1 << partition; 1400 1401 for (i = 0; i < OTYPCNT; i++) { 1402 if (vdp->xdf_vd_open[i] & parbit) 1403 rval = B_TRUE; 1404 } 1405 1406 return (rval); 1407 } 1408 1409 /* 1410 * The connection should never be closed as long as someone is holding 1411 * us open, there is pending IO, or someone is waiting waiting for a 1412 * connection. 1413 */ 1414 static boolean_t 1415 xdf_busy(xdf_t *vdp) 1416 { 1417 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1418 1419 if ((vdp->xdf_xb_ring != NULL) && 1420 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 1421 ASSERT(vdp->xdf_state != XD_CLOSED); 1422 return (B_TRUE); 1423 } 1424 1425 if (!list_is_empty(&vdp->xdf_vreq_act) || (vdp->xdf_f_act != NULL)) { 1426 ASSERT(vdp->xdf_state != XD_CLOSED); 1427 return (B_TRUE); 1428 } 1429 1430 if (xdf_isopen(vdp, -1)) { 1431 ASSERT(vdp->xdf_state != XD_CLOSED); 1432 return (B_TRUE); 1433 } 1434 1435 if (vdp->xdf_connect_req > 0) { 1436 ASSERT(vdp->xdf_state != XD_CLOSED); 1437 return (B_TRUE); 1438 } 1439 1440 return (B_FALSE); 1441 } 1442 1443 static void 1444 xdf_set_state(xdf_t *vdp, xdf_state_t new_state) 1445 { 1446 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1447 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1448 DPRINTF(DDI_DBG, ("xdf@%s: state change %d -> %d\n", 1449 vdp->xdf_addr, vdp->xdf_state, new_state)); 1450 vdp->xdf_state = new_state; 1451 cv_broadcast(&vdp->xdf_dev_cv); 1452 } 1453 1454 static void 1455 xdf_disconnect(xdf_t *vdp, xdf_state_t new_state, boolean_t quiet) 1456 { 1457 dev_info_t *dip = vdp->xdf_dip; 1458 boolean_t busy; 1459 1460 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1461 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1462 ASSERT((new_state == XD_UNKNOWN) || (new_state == XD_CLOSED)); 1463 1464 /* Check if we're already there. */ 1465 if (vdp->xdf_state == new_state) 1466 return; 1467 1468 mutex_enter(&vdp->xdf_dev_lk); 1469 busy = xdf_busy(vdp); 1470 1471 /* If we're already closed then there's nothing todo. */ 1472 if (vdp->xdf_state == XD_CLOSED) { 1473 ASSERT(!busy); 1474 xdf_set_state(vdp, new_state); 1475 mutex_exit(&vdp->xdf_dev_lk); 1476 return; 1477 } 1478 1479 #ifdef DEBUG 1480 /* UhOh. Warn the user that something bad has happened. */ 1481 if (!quiet && busy && (vdp->xdf_state == XD_READY) && 1482 (vdp->xdf_xdev_nblocks != 0)) { 1483 cmn_err(CE_WARN, "xdf@%s: disconnected while in use", 1484 vdp->xdf_addr); 1485 } 1486 #endif /* DEBUG */ 1487 1488 xdf_ring_destroy(vdp); 1489 1490 /* If we're busy then we can only go into the unknown state */ 1491 xdf_set_state(vdp, (busy) ? XD_UNKNOWN : new_state); 1492 mutex_exit(&vdp->xdf_dev_lk); 1493 1494 /* if we're closed now, let the other end know */ 1495 if (vdp->xdf_state == XD_CLOSED) 1496 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1497 } 1498 1499 1500 /* 1501 * Kick-off connect process 1502 * Status should be XD_UNKNOWN or XD_CLOSED 1503 * On success, status will be changed to XD_INIT 1504 * On error, it will be changed to XD_UNKNOWN 1505 */ 1506 static int 1507 xdf_setstate_init(xdf_t *vdp) 1508 { 1509 dev_info_t *dip = vdp->xdf_dip; 1510 xenbus_transaction_t xbt; 1511 grant_ref_t gref; 1512 char *xsname, *str; 1513 int rv; 1514 1515 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1516 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1517 ASSERT((vdp->xdf_state == XD_UNKNOWN) || 1518 (vdp->xdf_state == XD_CLOSED)); 1519 1520 DPRINTF(DDI_DBG, 1521 ("xdf@%s: starting connection process\n", vdp->xdf_addr)); 1522 1523 /* 1524 * If an eject is pending then don't allow a new connection. 1525 * (Only the backend can clear media request eject request.) 1526 */ 1527 if (xdf_eject_pending(vdp)) 1528 return (DDI_FAILURE); 1529 1530 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1531 goto errout; 1532 1533 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == INVALID_DOMID) 1534 goto errout; 1535 1536 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialising); 1537 1538 /* 1539 * Sanity check for the existance of the xenbus device-type property. 1540 * This property might not exist if we our xenbus device nodes was 1541 * force destroyed while we were still connected to the backend. 1542 */ 1543 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) 1544 goto errout; 1545 strfree(str); 1546 1547 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) 1548 goto errout; 1549 1550 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1551 #ifdef XPV_HVM_DRIVER 1552 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1553 #else /* !XPV_HVM_DRIVER */ 1554 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1555 DDI_SUCCESS) { 1556 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_init: " 1557 "failed to add intr handler", vdp->xdf_addr); 1558 goto errout1; 1559 } 1560 #endif /* !XPV_HVM_DRIVER */ 1561 1562 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1563 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1564 DDI_SUCCESS) { 1565 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1566 vdp->xdf_addr); 1567 goto errout2; 1568 } 1569 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1570 1571 /* 1572 * Write into xenstore the info needed by backend 1573 */ 1574 trans_retry: 1575 if (xenbus_transaction_start(&xbt)) { 1576 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1577 vdp->xdf_addr); 1578 xvdi_fatal_error(dip, EIO, "connect transaction init"); 1579 goto fail_trans; 1580 } 1581 1582 /* 1583 * XBP_PROTOCOL is written by the domain builder in the case of PV 1584 * domains. However, it is not written for HVM domains, so let's 1585 * write it here. 1586 */ 1587 if (((rv = xenbus_printf(xbt, xsname, 1588 XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE)) != 0) || 1589 ((rv = xenbus_printf(xbt, xsname, 1590 XBP_RING_REF, "%u", gref)) != 0) || 1591 ((rv = xenbus_printf(xbt, xsname, 1592 XBP_EVENT_CHAN, "%u", vdp->xdf_evtchn)) != 0) || 1593 ((rv = xenbus_printf(xbt, xsname, 1594 XBP_PROTOCOL, "%s", XEN_IO_PROTO_ABI_NATIVE)) != 0) || 1595 ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0)) { 1596 (void) xenbus_transaction_end(xbt, 1); 1597 xvdi_fatal_error(dip, rv, "connect transaction setup"); 1598 goto fail_trans; 1599 } 1600 1601 /* kick-off connect process */ 1602 if (rv = xenbus_transaction_end(xbt, 0)) { 1603 if (rv == EAGAIN) 1604 goto trans_retry; 1605 xvdi_fatal_error(dip, rv, "connect transaction commit"); 1606 goto fail_trans; 1607 } 1608 1609 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1610 mutex_enter(&vdp->xdf_dev_lk); 1611 xdf_set_state(vdp, XD_INIT); 1612 mutex_exit(&vdp->xdf_dev_lk); 1613 1614 return (DDI_SUCCESS); 1615 1616 fail_trans: 1617 xvdi_free_ring(vdp->xdf_xb_ring); 1618 errout2: 1619 #ifdef XPV_HVM_DRIVER 1620 ec_unbind_evtchn(vdp->xdf_evtchn); 1621 #else /* !XPV_HVM_DRIVER */ 1622 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1623 #endif /* !XPV_HVM_DRIVER */ 1624 errout1: 1625 xvdi_free_evtchn(dip); 1626 vdp->xdf_evtchn = INVALID_EVTCHN; 1627 errout: 1628 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1629 cmn_err(CE_WARN, "xdf@%s: failed to start connection to backend", 1630 vdp->xdf_addr); 1631 return (DDI_FAILURE); 1632 } 1633 1634 int 1635 xdf_get_flush_block(xdf_t *vdp) 1636 { 1637 /* 1638 * Get a DEV_BSIZE aligned bufer 1639 */ 1640 vdp->xdf_flush_mem = kmem_alloc(vdp->xdf_xdev_secsize * 2, KM_SLEEP); 1641 vdp->xdf_cache_flush_block = 1642 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), 1643 (int)vdp->xdf_xdev_secsize); 1644 1645 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1646 xdf_flush_block, vdp->xdf_xdev_secsize, NULL) != 0) 1647 return (DDI_FAILURE); 1648 return (DDI_SUCCESS); 1649 } 1650 1651 static void 1652 xdf_setstate_ready(void *arg) 1653 { 1654 xdf_t *vdp = (xdf_t *)arg; 1655 1656 vdp->xdf_ready_tq_thread = curthread; 1657 1658 /* 1659 * We've created all the minor nodes via cmlb_attach() using default 1660 * value in xdf_attach() to make it possible to block in xdf_open(), 1661 * in case there's anyone (say, booting thread) ever trying to open 1662 * it before connected to backend. We will refresh all those minor 1663 * nodes w/ latest info we've got now when we are almost connected. 1664 */ 1665 mutex_enter(&vdp->xdf_dev_lk); 1666 if (vdp->xdf_cmbl_reattach) { 1667 vdp->xdf_cmbl_reattach = B_FALSE; 1668 1669 mutex_exit(&vdp->xdf_dev_lk); 1670 if (xdf_cmlb_attach(vdp) != 0) { 1671 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1672 return; 1673 } 1674 mutex_enter(&vdp->xdf_dev_lk); 1675 } 1676 1677 /* If we're not still trying to get to the ready state, then bail. */ 1678 if (vdp->xdf_state != XD_CONNECTED) { 1679 mutex_exit(&vdp->xdf_dev_lk); 1680 return; 1681 } 1682 mutex_exit(&vdp->xdf_dev_lk); 1683 1684 /* 1685 * If backend has feature-barrier, see if it supports disk 1686 * cache flush op. 1687 */ 1688 vdp->xdf_flush_supported = B_FALSE; 1689 if (vdp->xdf_feature_barrier) { 1690 /* 1691 * Pretend we already know flush is supported so probe 1692 * will attempt the correct op. 1693 */ 1694 vdp->xdf_flush_supported = B_TRUE; 1695 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1696 vdp->xdf_flush_supported = B_TRUE; 1697 } else { 1698 vdp->xdf_flush_supported = B_FALSE; 1699 /* 1700 * If the other end does not support the cache flush op 1701 * then we must use a barrier-write to force disk 1702 * cache flushing. Barrier writes require that a data 1703 * block actually be written. 1704 * Cache a block to barrier-write when we are 1705 * asked to perform a flush. 1706 * XXX - would it be better to just copy 1 block 1707 * (512 bytes) from whatever write we did last 1708 * and rewrite that block? 1709 */ 1710 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) { 1711 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1712 return; 1713 } 1714 } 1715 } 1716 1717 mutex_enter(&vdp->xdf_cb_lk); 1718 mutex_enter(&vdp->xdf_dev_lk); 1719 if (vdp->xdf_state == XD_CONNECTED) 1720 xdf_set_state(vdp, XD_READY); 1721 mutex_exit(&vdp->xdf_dev_lk); 1722 1723 /* Restart any currently queued up io */ 1724 xdf_io_start(vdp); 1725 1726 mutex_exit(&vdp->xdf_cb_lk); 1727 } 1728 1729 /* 1730 * synthetic geometry 1731 */ 1732 #define XDF_NSECTS 256 1733 #define XDF_NHEADS 16 1734 1735 static void 1736 xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp) 1737 { 1738 xdf_t *vdp; 1739 uint_t ncyl; 1740 1741 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 1742 1743 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); 1744 1745 bzero(geomp, sizeof (*geomp)); 1746 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1747 geomp->g_acyl = 0; 1748 geomp->g_nhead = XDF_NHEADS; 1749 geomp->g_nsect = XDF_NSECTS; 1750 geomp->g_secsize = vdp->xdf_xdev_secsize; 1751 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1752 geomp->g_intrlv = 0; 1753 geomp->g_rpm = 7200; 1754 } 1755 1756 /* 1757 * Finish other initialization after we've connected to backend 1758 * Status should be XD_INIT before calling this routine 1759 * On success, status should be changed to XD_CONNECTED. 1760 * On error, status should stay XD_INIT 1761 */ 1762 static int 1763 xdf_setstate_connected(xdf_t *vdp) 1764 { 1765 dev_info_t *dip = vdp->xdf_dip; 1766 cmlb_geom_t pgeom; 1767 diskaddr_t nblocks = 0; 1768 uint_t secsize = 0; 1769 char *oename, *xsname, *str; 1770 uint_t dinfo; 1771 1772 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1773 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1774 ASSERT(vdp->xdf_state == XD_INIT); 1775 1776 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1777 ((oename = xvdi_get_oename(dip)) == NULL)) 1778 return (DDI_FAILURE); 1779 1780 /* Make sure the other end is XenbusStateConnected */ 1781 if (xenbus_read_driver_state(oename) != XenbusStateConnected) 1782 return (DDI_FAILURE); 1783 1784 /* Determine if feature barrier is supported by backend */ 1785 if (!(vdp->xdf_feature_barrier = xenbus_exists(oename, XBP_FB))) 1786 cmn_err(CE_NOTE, "!xdf@%s: feature-barrier not supported", 1787 vdp->xdf_addr); 1788 1789 /* 1790 * Probe backend. Read the device size into xdf_xdev_nblocks 1791 * and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE 1792 * flags in xdf_dinfo. If the emulated device type is "cdrom", 1793 * we always set VDISK_CDROM, regardless of if it's present in 1794 * the xenbus info parameter. 1795 */ 1796 if (xenbus_gather(XBT_NULL, oename, 1797 XBP_SECTORS, "%"SCNu64, &nblocks, 1798 XBP_SECTOR_SIZE, "%u", &secsize, 1799 XBP_INFO, "%u", &dinfo, 1800 NULL) != 0) { 1801 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1802 "cannot read backend info", vdp->xdf_addr); 1803 return (DDI_FAILURE); 1804 } 1805 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 1806 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 1807 vdp->xdf_addr); 1808 return (DDI_FAILURE); 1809 } 1810 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 1811 dinfo |= VDISK_CDROM; 1812 strfree(str); 1813 1814 if (secsize == 0 || !(ISP2(secsize / DEV_BSIZE))) 1815 secsize = DEV_BSIZE; 1816 vdp->xdf_xdev_nblocks = nblocks; 1817 vdp->xdf_xdev_secsize = secsize; 1818 #ifdef _ILP32 1819 if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) { 1820 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1821 "backend disk device too large with %llu blocks for" 1822 " 32-bit kernel", vdp->xdf_addr, vdp->xdf_xdev_nblocks); 1823 xvdi_fatal_error(dip, EFBIG, "reading backend info"); 1824 return (DDI_FAILURE); 1825 } 1826 #endif 1827 1828 /* 1829 * If the physical geometry for a fixed disk has been explicity 1830 * set then make sure that the specified physical geometry isn't 1831 * larger than the device we connected to. 1832 */ 1833 if (vdp->xdf_pgeom_fixed && 1834 (vdp->xdf_pgeom.g_capacity > vdp->xdf_xdev_nblocks)) { 1835 cmn_err(CE_WARN, 1836 "xdf@%s: connect failed, fixed geometry too large", 1837 vdp->xdf_addr); 1838 return (DDI_FAILURE); 1839 } 1840 1841 vdp->xdf_media_req_supported = xenbus_exists(oename, XBP_MEDIA_REQ_SUP); 1842 1843 /* mark vbd is ready for I/O */ 1844 mutex_enter(&vdp->xdf_dev_lk); 1845 xdf_set_state(vdp, XD_CONNECTED); 1846 1847 /* check if the cmlb label should be updated */ 1848 xdf_synthetic_pgeom(dip, &pgeom); 1849 if ((vdp->xdf_dinfo != dinfo) || 1850 (!vdp->xdf_pgeom_fixed && 1851 (memcmp(&vdp->xdf_pgeom, &pgeom, sizeof (pgeom)) != 0))) { 1852 vdp->xdf_cmbl_reattach = B_TRUE; 1853 1854 vdp->xdf_dinfo = dinfo; 1855 if (!vdp->xdf_pgeom_fixed) 1856 vdp->xdf_pgeom = pgeom; 1857 } 1858 1859 if (XD_IS_CD(vdp) || XD_IS_RM(vdp)) { 1860 if (vdp->xdf_xdev_nblocks == 0) { 1861 vdp->xdf_mstate = DKIO_EJECTED; 1862 cv_broadcast(&vdp->xdf_mstate_cv); 1863 } else { 1864 vdp->xdf_mstate = DKIO_INSERTED; 1865 cv_broadcast(&vdp->xdf_mstate_cv); 1866 } 1867 } else { 1868 if (vdp->xdf_mstate != DKIO_NONE) { 1869 vdp->xdf_mstate = DKIO_NONE; 1870 cv_broadcast(&vdp->xdf_mstate_cv); 1871 } 1872 } 1873 1874 mutex_exit(&vdp->xdf_dev_lk); 1875 1876 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", vdp->xdf_addr, 1877 (uint64_t)vdp->xdf_xdev_nblocks); 1878 1879 /* Restart any currently queued up io */ 1880 xdf_io_start(vdp); 1881 1882 /* 1883 * To get to the ready state we have to do IO to the backend device, 1884 * but we can't initiate IO from the other end change callback thread 1885 * (which is the current context we're executing in.) This is because 1886 * if the other end disconnects while we're doing IO from the callback 1887 * thread, then we can't recieve that disconnect event and we hang 1888 * waiting for an IO that can never complete. 1889 */ 1890 (void) ddi_taskq_dispatch(vdp->xdf_ready_tq, xdf_setstate_ready, vdp, 1891 DDI_SLEEP); 1892 1893 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1894 return (DDI_SUCCESS); 1895 } 1896 1897 /*ARGSUSED*/ 1898 static void 1899 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 1900 { 1901 XenbusState new_state = *(XenbusState *)impl_data; 1902 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 1903 1904 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 1905 vdp->xdf_addr, new_state)); 1906 1907 mutex_enter(&vdp->xdf_cb_lk); 1908 1909 /* We assume that this callback is single threaded */ 1910 ASSERT(vdp->xdf_oe_change_thread == NULL); 1911 DEBUG_EVAL(vdp->xdf_oe_change_thread = curthread); 1912 1913 /* ignore any backend state changes if we're suspending/suspended */ 1914 if (vdp->xdf_suspending || (vdp->xdf_state == XD_SUSPEND)) { 1915 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1916 mutex_exit(&vdp->xdf_cb_lk); 1917 return; 1918 } 1919 1920 switch (new_state) { 1921 case XenbusStateUnknown: 1922 case XenbusStateInitialising: 1923 case XenbusStateInitWait: 1924 case XenbusStateInitialised: 1925 if (vdp->xdf_state == XD_INIT) 1926 break; 1927 1928 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1929 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1930 break; 1931 ASSERT(vdp->xdf_state == XD_INIT); 1932 break; 1933 1934 case XenbusStateConnected: 1935 if ((vdp->xdf_state == XD_CONNECTED) || 1936 (vdp->xdf_state == XD_READY)) 1937 break; 1938 1939 if (vdp->xdf_state != XD_INIT) { 1940 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1941 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1942 break; 1943 ASSERT(vdp->xdf_state == XD_INIT); 1944 } 1945 1946 if (xdf_setstate_connected(vdp) != DDI_SUCCESS) { 1947 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1948 break; 1949 } 1950 ASSERT(vdp->xdf_state == XD_CONNECTED); 1951 break; 1952 1953 case XenbusStateClosing: 1954 if (xdf_isopen(vdp, -1)) { 1955 cmn_err(CE_NOTE, 1956 "xdf@%s: hot-unplug failed, still in use", 1957 vdp->xdf_addr); 1958 break; 1959 } 1960 /*FALLTHROUGH*/ 1961 case XenbusStateClosed: 1962 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 1963 break; 1964 } 1965 1966 /* notify anybody waiting for oe state change */ 1967 cv_broadcast(&vdp->xdf_dev_cv); 1968 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1969 mutex_exit(&vdp->xdf_cb_lk); 1970 } 1971 1972 static int 1973 xdf_connect_locked(xdf_t *vdp, boolean_t wait) 1974 { 1975 int rv, timeouts = 0, reset = 20; 1976 1977 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1978 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1979 1980 /* we can't connect once we're in the closed state */ 1981 if (vdp->xdf_state == XD_CLOSED) 1982 return (XD_CLOSED); 1983 1984 vdp->xdf_connect_req++; 1985 while (vdp->xdf_state != XD_READY) { 1986 mutex_exit(&vdp->xdf_dev_lk); 1987 1988 /* only one thread at a time can be the connection thread */ 1989 if (vdp->xdf_connect_thread == NULL) 1990 vdp->xdf_connect_thread = curthread; 1991 1992 if (vdp->xdf_connect_thread == curthread) { 1993 if ((timeouts > 0) && ((timeouts % reset) == 0)) { 1994 /* 1995 * If we haven't establised a connection 1996 * within the reset time, then disconnect 1997 * so we can try again, and double the reset 1998 * time. The reset time starts at 2 sec. 1999 */ 2000 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 2001 reset *= 2; 2002 } 2003 if (vdp->xdf_state == XD_UNKNOWN) 2004 (void) xdf_setstate_init(vdp); 2005 if (vdp->xdf_state == XD_INIT) 2006 (void) xdf_setstate_connected(vdp); 2007 } 2008 2009 mutex_enter(&vdp->xdf_dev_lk); 2010 if (!wait || (vdp->xdf_state == XD_READY)) 2011 goto out; 2012 2013 mutex_exit((&vdp->xdf_cb_lk)); 2014 if (vdp->xdf_connect_thread != curthread) { 2015 rv = cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk); 2016 } else { 2017 /* delay for 0.1 sec */ 2018 rv = cv_timedwait_sig(&vdp->xdf_dev_cv, 2019 &vdp->xdf_dev_lk, lbolt + drv_usectohz(100*1000)); 2020 if (rv == -1) 2021 timeouts++; 2022 } 2023 mutex_exit((&vdp->xdf_dev_lk)); 2024 mutex_enter((&vdp->xdf_cb_lk)); 2025 mutex_enter((&vdp->xdf_dev_lk)); 2026 if (rv == 0) 2027 goto out; 2028 } 2029 2030 out: 2031 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2032 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 2033 2034 if (vdp->xdf_connect_thread == curthread) { 2035 /* 2036 * wake up someone else so they can become the connection 2037 * thread. 2038 */ 2039 cv_signal(&vdp->xdf_dev_cv); 2040 vdp->xdf_connect_thread = NULL; 2041 } 2042 2043 /* Try to lock the media */ 2044 mutex_exit((&vdp->xdf_dev_lk)); 2045 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2046 mutex_enter((&vdp->xdf_dev_lk)); 2047 2048 vdp->xdf_connect_req--; 2049 return (vdp->xdf_state); 2050 } 2051 2052 static uint_t 2053 xdf_iorestart(caddr_t arg) 2054 { 2055 xdf_t *vdp = (xdf_t *)arg; 2056 2057 ASSERT(vdp != NULL); 2058 2059 mutex_enter(&vdp->xdf_dev_lk); 2060 ASSERT(ISDMACBON(vdp)); 2061 SETDMACBOFF(vdp); 2062 mutex_exit(&vdp->xdf_dev_lk); 2063 2064 xdf_io_start(vdp); 2065 2066 return (DDI_INTR_CLAIMED); 2067 } 2068 2069 #if defined(XPV_HVM_DRIVER) 2070 2071 typedef struct xdf_hvm_entry { 2072 list_node_t xdf_he_list; 2073 char *xdf_he_path; 2074 dev_info_t *xdf_he_dip; 2075 } xdf_hvm_entry_t; 2076 2077 static list_t xdf_hvm_list; 2078 static kmutex_t xdf_hvm_list_lock; 2079 2080 static xdf_hvm_entry_t * 2081 i_xdf_hvm_find(const char *path, dev_info_t *dip) 2082 { 2083 xdf_hvm_entry_t *i; 2084 2085 ASSERT((path != NULL) || (dip != NULL)); 2086 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); 2087 2088 i = list_head(&xdf_hvm_list); 2089 while (i != NULL) { 2090 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { 2091 i = list_next(&xdf_hvm_list, i); 2092 continue; 2093 } 2094 if ((dip != NULL) && (i->xdf_he_dip != dip)) { 2095 i = list_next(&xdf_hvm_list, i); 2096 continue; 2097 } 2098 break; 2099 } 2100 return (i); 2101 } 2102 2103 dev_info_t * 2104 xdf_hvm_hold(const char *path) 2105 { 2106 xdf_hvm_entry_t *i; 2107 dev_info_t *dip; 2108 2109 mutex_enter(&xdf_hvm_list_lock); 2110 i = i_xdf_hvm_find(path, NULL); 2111 if (i == NULL) { 2112 mutex_exit(&xdf_hvm_list_lock); 2113 return (B_FALSE); 2114 } 2115 ndi_hold_devi(dip = i->xdf_he_dip); 2116 mutex_exit(&xdf_hvm_list_lock); 2117 return (dip); 2118 } 2119 2120 static void 2121 xdf_hvm_add(dev_info_t *dip) 2122 { 2123 xdf_hvm_entry_t *i; 2124 char *path; 2125 2126 /* figure out the path for the dip */ 2127 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 2128 (void) ddi_pathname(dip, path); 2129 2130 i = kmem_alloc(sizeof (*i), KM_SLEEP); 2131 i->xdf_he_dip = dip; 2132 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); 2133 2134 mutex_enter(&xdf_hvm_list_lock); 2135 ASSERT(i_xdf_hvm_find(path, NULL) == NULL); 2136 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); 2137 list_insert_head(&xdf_hvm_list, i); 2138 mutex_exit(&xdf_hvm_list_lock); 2139 2140 kmem_free(path, MAXPATHLEN); 2141 } 2142 2143 static void 2144 xdf_hvm_rm(dev_info_t *dip) 2145 { 2146 xdf_hvm_entry_t *i; 2147 2148 mutex_enter(&xdf_hvm_list_lock); 2149 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); 2150 list_remove(&xdf_hvm_list, i); 2151 mutex_exit(&xdf_hvm_list_lock); 2152 2153 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); 2154 kmem_free(i, sizeof (*i)); 2155 } 2156 2157 static void 2158 xdf_hvm_init(void) 2159 { 2160 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), 2161 offsetof(xdf_hvm_entry_t, xdf_he_list)); 2162 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); 2163 } 2164 2165 static void 2166 xdf_hvm_fini(void) 2167 { 2168 ASSERT(list_head(&xdf_hvm_list) == NULL); 2169 list_destroy(&xdf_hvm_list); 2170 mutex_destroy(&xdf_hvm_list_lock); 2171 } 2172 2173 boolean_t 2174 xdf_hvm_connect(dev_info_t *dip) 2175 { 2176 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2177 char *oename, *str; 2178 int rv; 2179 2180 mutex_enter(&vdp->xdf_cb_lk); 2181 2182 /* 2183 * Before try to establish a connection we need to wait for the 2184 * backend hotplug scripts to have run. Once they are run the 2185 * "<oename>/hotplug-status" property will be set to "connected". 2186 */ 2187 for (;;) { 2188 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2189 2190 /* 2191 * Get the xenbus path to the backend device. Note that 2192 * we can't cache this path (and we look it up on each pass 2193 * through this loop) because it could change during 2194 * suspend, resume, and migration operations. 2195 */ 2196 if ((oename = xvdi_get_oename(dip)) == NULL) { 2197 mutex_exit(&vdp->xdf_cb_lk); 2198 return (B_FALSE); 2199 } 2200 2201 str = NULL; 2202 if ((xenbus_read_str(oename, XBP_HP_STATUS, &str) == 0) && 2203 (strcmp(str, XBV_HP_STATUS_CONN) == 0)) 2204 break; 2205 2206 if (str != NULL) 2207 strfree(str); 2208 2209 /* wait for an update to "<oename>/hotplug-status" */ 2210 if (cv_wait_sig(&vdp->xdf_hp_status_cv, &vdp->xdf_cb_lk) == 0) { 2211 /* we got interrupted by a signal */ 2212 mutex_exit(&vdp->xdf_cb_lk); 2213 return (B_FALSE); 2214 } 2215 } 2216 2217 /* Good news. The backend hotplug scripts have been run. */ 2218 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2219 ASSERT(strcmp(str, XBV_HP_STATUS_CONN) == 0); 2220 strfree(str); 2221 2222 /* 2223 * If we're emulating a cd device and if the backend doesn't support 2224 * media request opreations, then we're not going to bother trying 2225 * to establish a connection for a couple reasons. First off, media 2226 * requests support is required to support operations like eject and 2227 * media locking. Second, other backend platforms like Linux don't 2228 * support hvm pv cdrom access. They don't even have a backend pv 2229 * driver for cdrom device nodes, so we don't want to block forever 2230 * waiting for a connection to a backend driver that doesn't exist. 2231 */ 2232 if (XD_IS_CD(vdp) && !xenbus_exists(oename, XBP_MEDIA_REQ_SUP)) { 2233 mutex_exit(&vdp->xdf_cb_lk); 2234 return (B_FALSE); 2235 } 2236 2237 mutex_enter(&vdp->xdf_dev_lk); 2238 rv = xdf_connect_locked(vdp, B_TRUE); 2239 mutex_exit(&vdp->xdf_dev_lk); 2240 mutex_exit(&vdp->xdf_cb_lk); 2241 2242 return ((rv == XD_READY) ? B_TRUE : B_FALSE); 2243 } 2244 2245 int 2246 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2247 { 2248 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2249 2250 /* sanity check the requested physical geometry */ 2251 mutex_enter(&vdp->xdf_dev_lk); 2252 if ((geomp->g_secsize != XB_BSIZE) || 2253 (geomp->g_capacity == 0)) { 2254 mutex_exit(&vdp->xdf_dev_lk); 2255 return (EINVAL); 2256 } 2257 2258 /* 2259 * If we've already connected to the backend device then make sure 2260 * we're not defining a physical geometry larger than our backend 2261 * device. 2262 */ 2263 if ((vdp->xdf_xdev_nblocks != 0) && 2264 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { 2265 mutex_exit(&vdp->xdf_dev_lk); 2266 return (EINVAL); 2267 } 2268 2269 bzero(&vdp->xdf_pgeom, sizeof (vdp->xdf_pgeom)); 2270 vdp->xdf_pgeom.g_ncyl = geomp->g_ncyl; 2271 vdp->xdf_pgeom.g_acyl = geomp->g_acyl; 2272 vdp->xdf_pgeom.g_nhead = geomp->g_nhead; 2273 vdp->xdf_pgeom.g_nsect = geomp->g_nsect; 2274 vdp->xdf_pgeom.g_secsize = geomp->g_secsize; 2275 vdp->xdf_pgeom.g_capacity = geomp->g_capacity; 2276 vdp->xdf_pgeom.g_intrlv = geomp->g_intrlv; 2277 vdp->xdf_pgeom.g_rpm = geomp->g_rpm; 2278 2279 vdp->xdf_pgeom_fixed = B_TRUE; 2280 mutex_exit(&vdp->xdf_dev_lk); 2281 2282 /* force a re-validation */ 2283 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 2284 2285 return (0); 2286 } 2287 2288 boolean_t 2289 xdf_is_cd(dev_info_t *dip) 2290 { 2291 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2292 boolean_t rv; 2293 2294 mutex_enter(&vdp->xdf_cb_lk); 2295 rv = XD_IS_CD(vdp); 2296 mutex_exit(&vdp->xdf_cb_lk); 2297 return (rv); 2298 } 2299 2300 boolean_t 2301 xdf_is_rm(dev_info_t *dip) 2302 { 2303 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2304 boolean_t rv; 2305 2306 mutex_enter(&vdp->xdf_cb_lk); 2307 rv = XD_IS_RM(vdp); 2308 mutex_exit(&vdp->xdf_cb_lk); 2309 return (rv); 2310 } 2311 2312 boolean_t 2313 xdf_media_req_supported(dev_info_t *dip) 2314 { 2315 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2316 boolean_t rv; 2317 2318 mutex_enter(&vdp->xdf_cb_lk); 2319 rv = vdp->xdf_media_req_supported; 2320 mutex_exit(&vdp->xdf_cb_lk); 2321 return (rv); 2322 } 2323 2324 #endif /* XPV_HVM_DRIVER */ 2325 2326 static int 2327 xdf_lb_getcap(dev_info_t *dip, diskaddr_t *capp) 2328 { 2329 xdf_t *vdp; 2330 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2331 2332 if (vdp == NULL) 2333 return (ENXIO); 2334 2335 mutex_enter(&vdp->xdf_dev_lk); 2336 *capp = vdp->xdf_pgeom.g_capacity; 2337 DPRINTF(LBL_DBG, ("xdf@%s:capacity %llu\n", vdp->xdf_addr, *capp)); 2338 mutex_exit(&vdp->xdf_dev_lk); 2339 return (0); 2340 } 2341 2342 static int 2343 xdf_lb_getpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2344 { 2345 xdf_t *vdp; 2346 2347 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 2348 return (ENXIO); 2349 *geomp = vdp->xdf_pgeom; 2350 return (0); 2351 } 2352 2353 /* 2354 * No real HBA, no geometry available from it 2355 */ 2356 /*ARGSUSED*/ 2357 static int 2358 xdf_lb_getvgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2359 { 2360 return (EINVAL); 2361 } 2362 2363 static int 2364 xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep) 2365 { 2366 xdf_t *vdp; 2367 2368 if (!(vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)))) 2369 return (ENXIO); 2370 2371 if (XD_IS_RO(vdp)) 2372 tgattributep->media_is_writable = 0; 2373 else 2374 tgattributep->media_is_writable = 1; 2375 return (0); 2376 } 2377 2378 /* ARGSUSED3 */ 2379 int 2380 xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 2381 { 2382 int instance; 2383 xdf_t *vdp; 2384 2385 instance = ddi_get_instance(dip); 2386 2387 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) 2388 return (ENXIO); 2389 2390 switch (cmd) { 2391 case TG_GETPHYGEOM: 2392 return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg)); 2393 case TG_GETVIRTGEOM: 2394 return (xdf_lb_getvgeom(dip, (cmlb_geom_t *)arg)); 2395 case TG_GETCAPACITY: 2396 return (xdf_lb_getcap(dip, (diskaddr_t *)arg)); 2397 case TG_GETBLOCKSIZE: 2398 mutex_enter(&vdp->xdf_cb_lk); 2399 *(uint32_t *)arg = vdp->xdf_xdev_secsize; 2400 mutex_exit(&vdp->xdf_cb_lk); 2401 return (0); 2402 case TG_GETATTR: 2403 return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg)); 2404 default: 2405 return (ENOTTY); 2406 } 2407 } 2408 2409 /* ARGSUSED5 */ 2410 int 2411 xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp, 2412 diskaddr_t start, size_t reqlen, void *tg_cookie) 2413 { 2414 xdf_t *vdp; 2415 struct buf *bp; 2416 int err = 0; 2417 2418 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2419 2420 /* We don't allow IO from the oe_change callback thread */ 2421 ASSERT(curthread != vdp->xdf_oe_change_thread); 2422 2423 if ((start + ((reqlen / (vdp->xdf_xdev_secsize / DEV_BSIZE)) 2424 >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) 2425 return (EINVAL); 2426 2427 bp = getrbuf(KM_SLEEP); 2428 if (cmd == TG_READ) 2429 bp->b_flags = B_BUSY | B_READ; 2430 else 2431 bp->b_flags = B_BUSY | B_WRITE; 2432 2433 bp->b_un.b_addr = bufp; 2434 bp->b_bcount = reqlen; 2435 bp->b_blkno = start * (vdp->xdf_xdev_secsize / DEV_BSIZE); 2436 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 2437 2438 mutex_enter(&vdp->xdf_dev_lk); 2439 xdf_bp_push(vdp, bp); 2440 mutex_exit(&vdp->xdf_dev_lk); 2441 xdf_io_start(vdp); 2442 if (curthread == vdp->xdf_ready_tq_thread) 2443 (void) xdf_ring_drain(vdp); 2444 err = biowait(bp); 2445 ASSERT(bp->b_flags & B_DONE); 2446 freerbuf(bp); 2447 return (err); 2448 } 2449 2450 /* 2451 * Lock the current media. Set the media state to "lock". 2452 * (Media locks are only respected by the backend driver.) 2453 */ 2454 static int 2455 xdf_ioctl_mlock(xdf_t *vdp) 2456 { 2457 int rv; 2458 mutex_enter(&vdp->xdf_cb_lk); 2459 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2460 mutex_exit(&vdp->xdf_cb_lk); 2461 return (rv); 2462 } 2463 2464 /* 2465 * Release a media lock. Set the media state to "none". 2466 */ 2467 static int 2468 xdf_ioctl_munlock(xdf_t *vdp) 2469 { 2470 int rv; 2471 mutex_enter(&vdp->xdf_cb_lk); 2472 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_NONE, B_TRUE); 2473 mutex_exit(&vdp->xdf_cb_lk); 2474 return (rv); 2475 } 2476 2477 /* 2478 * Eject the current media. Ignores any media locks. (Media locks 2479 * are only for benifit of the the backend.) 2480 */ 2481 static int 2482 xdf_ioctl_eject(xdf_t *vdp) 2483 { 2484 int rv; 2485 2486 mutex_enter(&vdp->xdf_cb_lk); 2487 if ((rv = xdf_media_req(vdp, XBV_MEDIA_REQ_EJECT, B_FALSE)) != 0) { 2488 mutex_exit(&vdp->xdf_cb_lk); 2489 return (rv); 2490 } 2491 2492 /* 2493 * We've set the media requests xenbus parameter to eject, so now 2494 * disconnect from the backend, wait for the backend to clear 2495 * the media requets xenbus paramter, and then we can reconnect 2496 * to the backend. 2497 */ 2498 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 2499 mutex_enter(&vdp->xdf_dev_lk); 2500 if (xdf_connect_locked(vdp, B_TRUE) != XD_READY) { 2501 mutex_exit(&vdp->xdf_dev_lk); 2502 mutex_exit(&vdp->xdf_cb_lk); 2503 return (EIO); 2504 } 2505 mutex_exit(&vdp->xdf_dev_lk); 2506 mutex_exit(&vdp->xdf_cb_lk); 2507 return (0); 2508 } 2509 2510 /* 2511 * Watch for media state changes. This can be an insertion of a device 2512 * (triggered by a 'xm block-configure' request in another domain) or 2513 * the ejection of a device (triggered by a local "eject" operation). 2514 * For a full description of the DKIOCSTATE ioctl behavior see dkio(7I). 2515 */ 2516 static int 2517 xdf_dkstate(xdf_t *vdp, enum dkio_state mstate) 2518 { 2519 enum dkio_state prev_state; 2520 2521 mutex_enter(&vdp->xdf_cb_lk); 2522 prev_state = vdp->xdf_mstate; 2523 2524 if (vdp->xdf_mstate == mstate) { 2525 while (vdp->xdf_mstate == prev_state) { 2526 if (cv_wait_sig(&vdp->xdf_mstate_cv, 2527 &vdp->xdf_cb_lk) == 0) { 2528 mutex_exit(&vdp->xdf_cb_lk); 2529 return (EINTR); 2530 } 2531 } 2532 } 2533 2534 if ((prev_state != DKIO_INSERTED) && 2535 (vdp->xdf_mstate == DKIO_INSERTED)) { 2536 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2537 mutex_exit(&vdp->xdf_cb_lk); 2538 return (0); 2539 } 2540 2541 mutex_exit(&vdp->xdf_cb_lk); 2542 return (0); 2543 } 2544 2545 /*ARGSUSED*/ 2546 static int 2547 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 2548 int *rvalp) 2549 { 2550 minor_t minor = getminor(dev); 2551 int part = XDF_PART(minor); 2552 xdf_t *vdp; 2553 int rv; 2554 2555 if (((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) || 2556 (!xdf_isopen(vdp, part))) 2557 return (ENXIO); 2558 2559 DPRINTF(IOCTL_DBG, ("xdf@%s:ioctl: cmd %d (0x%x)\n", 2560 vdp->xdf_addr, cmd, cmd)); 2561 2562 switch (cmd) { 2563 default: 2564 return (ENOTTY); 2565 case DKIOCG_PHYGEOM: 2566 case DKIOCG_VIRTGEOM: 2567 case DKIOCGGEOM: 2568 case DKIOCSGEOM: 2569 case DKIOCGAPART: 2570 case DKIOCSAPART: 2571 case DKIOCGVTOC: 2572 case DKIOCSVTOC: 2573 case DKIOCPARTINFO: 2574 case DKIOCGEXTVTOC: 2575 case DKIOCSEXTVTOC: 2576 case DKIOCEXTPARTINFO: 2577 case DKIOCGMBOOT: 2578 case DKIOCSMBOOT: 2579 case DKIOCGETEFI: 2580 case DKIOCSETEFI: 2581 case DKIOCSETEXTPART: 2582 case DKIOCPARTITION: 2583 return (cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 2584 rvalp, NULL)); 2585 case FDEJECT: 2586 case DKIOCEJECT: 2587 case CDROMEJECT: 2588 return (xdf_ioctl_eject(vdp)); 2589 case DKIOCLOCK: 2590 return (xdf_ioctl_mlock(vdp)); 2591 case DKIOCUNLOCK: 2592 return (xdf_ioctl_munlock(vdp)); 2593 case CDROMREADOFFSET: { 2594 int offset = 0; 2595 if (!XD_IS_CD(vdp)) 2596 return (ENOTTY); 2597 if (ddi_copyout(&offset, (void *)arg, sizeof (int), mode)) 2598 return (EFAULT); 2599 return (0); 2600 } 2601 case DKIOCGMEDIAINFO: { 2602 struct dk_minfo media_info; 2603 2604 media_info.dki_lbsize = vdp->xdf_xdev_secsize; 2605 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; 2606 if (XD_IS_CD(vdp)) 2607 media_info.dki_media_type = DK_CDROM; 2608 else 2609 media_info.dki_media_type = DK_FIXED_DISK; 2610 2611 if (ddi_copyout(&media_info, (void *)arg, 2612 sizeof (struct dk_minfo), mode)) 2613 return (EFAULT); 2614 return (0); 2615 } 2616 case DKIOCINFO: { 2617 struct dk_cinfo info; 2618 2619 /* controller information */ 2620 if (XD_IS_CD(vdp)) 2621 info.dki_ctype = DKC_CDROM; 2622 else 2623 info.dki_ctype = DKC_VBD; 2624 2625 info.dki_cnum = 0; 2626 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 2627 2628 /* unit information */ 2629 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 2630 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 2631 info.dki_flags = DKI_FMTVOL; 2632 info.dki_partition = part; 2633 info.dki_maxtransfer = maxphys / DEV_BSIZE; 2634 info.dki_addr = 0; 2635 info.dki_space = 0; 2636 info.dki_prio = 0; 2637 info.dki_vec = 0; 2638 2639 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 2640 return (EFAULT); 2641 return (0); 2642 } 2643 case DKIOCSTATE: { 2644 enum dkio_state mstate; 2645 2646 if (ddi_copyin((void *)arg, &mstate, 2647 sizeof (mstate), mode) != 0) 2648 return (EFAULT); 2649 if ((rv = xdf_dkstate(vdp, mstate)) != 0) 2650 return (rv); 2651 mstate = vdp->xdf_mstate; 2652 if (ddi_copyout(&mstate, (void *)arg, 2653 sizeof (mstate), mode) != 0) 2654 return (EFAULT); 2655 return (0); 2656 } 2657 case DKIOCREMOVABLE: { 2658 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2659 if (ddi_copyout(&i, (caddr_t)arg, sizeof (i), mode)) 2660 return (EFAULT); 2661 return (0); 2662 } 2663 case DKIOCGETWCE: { 2664 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2665 if (ddi_copyout(&i, (void *)arg, sizeof (i), mode)) 2666 return (EFAULT); 2667 return (0); 2668 } 2669 case DKIOCSETWCE: { 2670 int i; 2671 if (ddi_copyin((void *)arg, &i, sizeof (i), mode)) 2672 return (EFAULT); 2673 vdp->xdf_wce = VOID2BOOLEAN(i); 2674 return (0); 2675 } 2676 case DKIOCFLUSHWRITECACHE: { 2677 struct dk_callback *dkc = (struct dk_callback *)arg; 2678 2679 if (vdp->xdf_flush_supported) { 2680 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2681 NULL, 0, 0, (void *)dev); 2682 } else if (vdp->xdf_feature_barrier && 2683 !xdf_barrier_flush_disable) { 2684 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2685 vdp->xdf_cache_flush_block, xdf_flush_block, 2686 vdp->xdf_xdev_secsize, (void *)dev); 2687 } else { 2688 return (ENOTTY); 2689 } 2690 if ((mode & FKIOCTL) && (dkc != NULL) && 2691 (dkc->dkc_callback != NULL)) { 2692 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 2693 /* need to return 0 after calling callback */ 2694 rv = 0; 2695 } 2696 return (rv); 2697 } 2698 } 2699 /*NOTREACHED*/ 2700 } 2701 2702 static int 2703 xdf_strategy(struct buf *bp) 2704 { 2705 xdf_t *vdp; 2706 minor_t minor; 2707 diskaddr_t p_blkct, p_blkst; 2708 daddr_t blkno; 2709 ulong_t nblks; 2710 int part; 2711 2712 minor = getminor(bp->b_edev); 2713 part = XDF_PART(minor); 2714 vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor)); 2715 2716 mutex_enter(&vdp->xdf_dev_lk); 2717 if (!xdf_isopen(vdp, part)) { 2718 mutex_exit(&vdp->xdf_dev_lk); 2719 xdf_io_err(bp, ENXIO, 0); 2720 return (0); 2721 } 2722 2723 /* We don't allow IO from the oe_change callback thread */ 2724 ASSERT(curthread != vdp->xdf_oe_change_thread); 2725 2726 /* Check for writes to a read only device */ 2727 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 2728 mutex_exit(&vdp->xdf_dev_lk); 2729 xdf_io_err(bp, EROFS, 0); 2730 return (0); 2731 } 2732 2733 /* Check if this I/O is accessing a partition or the entire disk */ 2734 if ((long)bp->b_private == XB_SLICE_NONE) { 2735 /* This I/O is using an absolute offset */ 2736 p_blkct = vdp->xdf_xdev_nblocks; 2737 p_blkst = 0; 2738 } else { 2739 /* This I/O is using a partition relative offset */ 2740 mutex_exit(&vdp->xdf_dev_lk); 2741 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 2742 &p_blkst, NULL, NULL, NULL)) { 2743 xdf_io_err(bp, ENXIO, 0); 2744 return (0); 2745 } 2746 mutex_enter(&vdp->xdf_dev_lk); 2747 } 2748 2749 /* 2750 * Adjust the real blkno and bcount according to the underline 2751 * physical sector size. 2752 */ 2753 blkno = bp->b_blkno / (vdp->xdf_xdev_secsize / XB_BSIZE); 2754 2755 /* check for a starting block beyond the disk or partition limit */ 2756 if (blkno > p_blkct) { 2757 DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64, 2758 vdp->xdf_addr, (longlong_t)blkno, (uint64_t)p_blkct)); 2759 mutex_exit(&vdp->xdf_dev_lk); 2760 xdf_io_err(bp, EINVAL, 0); 2761 return (0); 2762 } 2763 2764 /* Legacy: don't set error flag at this case */ 2765 if (blkno == p_blkct) { 2766 mutex_exit(&vdp->xdf_dev_lk); 2767 bp->b_resid = bp->b_bcount; 2768 biodone(bp); 2769 return (0); 2770 } 2771 2772 /* sanitize the input buf */ 2773 bioerror(bp, 0); 2774 bp->b_resid = 0; 2775 bp->av_back = bp->av_forw = NULL; 2776 2777 /* Adjust for partial transfer, this will result in an error later */ 2778 if (vdp->xdf_xdev_secsize != 0 && 2779 vdp->xdf_xdev_secsize != XB_BSIZE) { 2780 nblks = bp->b_bcount / vdp->xdf_xdev_secsize; 2781 } else { 2782 nblks = bp->b_bcount >> XB_BSHIFT; 2783 } 2784 2785 if ((blkno + nblks) > p_blkct) { 2786 if (vdp->xdf_xdev_secsize != 0 && 2787 vdp->xdf_xdev_secsize != XB_BSIZE) { 2788 bp->b_resid = 2789 ((blkno + nblks) - p_blkct) * 2790 vdp->xdf_xdev_secsize; 2791 } else { 2792 bp->b_resid = 2793 ((blkno + nblks) - p_blkct) << 2794 XB_BSHIFT; 2795 } 2796 bp->b_bcount -= bp->b_resid; 2797 } 2798 2799 DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n", 2800 vdp->xdf_addr, (longlong_t)blkno, (ulong_t)bp->b_bcount)); 2801 2802 /* Fix up the buf struct */ 2803 bp->b_flags |= B_BUSY; 2804 bp->b_private = (void *)(uintptr_t)p_blkst; 2805 2806 xdf_bp_push(vdp, bp); 2807 mutex_exit(&vdp->xdf_dev_lk); 2808 xdf_io_start(vdp); 2809 if (do_polled_io) 2810 (void) xdf_ring_drain(vdp); 2811 return (0); 2812 } 2813 2814 /*ARGSUSED*/ 2815 static int 2816 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 2817 { 2818 xdf_t *vdp; 2819 minor_t minor; 2820 diskaddr_t p_blkcnt; 2821 int part; 2822 2823 minor = getminor(dev); 2824 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2825 return (ENXIO); 2826 2827 DPRINTF(IO_DBG, ("xdf@%s: read offset 0x%"PRIx64"\n", 2828 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2829 2830 part = XDF_PART(minor); 2831 if (!xdf_isopen(vdp, part)) 2832 return (ENXIO); 2833 2834 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2835 NULL, NULL, NULL, NULL)) 2836 return (ENXIO); 2837 2838 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2839 return (ENOSPC); 2840 2841 if (U_INVAL(uiop)) 2842 return (EINVAL); 2843 2844 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 2845 } 2846 2847 /*ARGSUSED*/ 2848 static int 2849 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 2850 { 2851 xdf_t *vdp; 2852 minor_t minor; 2853 diskaddr_t p_blkcnt; 2854 int part; 2855 2856 minor = getminor(dev); 2857 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2858 return (ENXIO); 2859 2860 DPRINTF(IO_DBG, ("xdf@%s: write offset 0x%"PRIx64"\n", 2861 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2862 2863 part = XDF_PART(minor); 2864 if (!xdf_isopen(vdp, part)) 2865 return (ENXIO); 2866 2867 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2868 NULL, NULL, NULL, NULL)) 2869 return (ENXIO); 2870 2871 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2872 return (ENOSPC); 2873 2874 if (U_INVAL(uiop)) 2875 return (EINVAL); 2876 2877 return (physio(xdf_strategy, NULL, dev, B_WRITE, xdfmin, uiop)); 2878 } 2879 2880 /*ARGSUSED*/ 2881 static int 2882 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 2883 { 2884 xdf_t *vdp; 2885 minor_t minor; 2886 struct uio *uiop = aiop->aio_uio; 2887 diskaddr_t p_blkcnt; 2888 int part; 2889 2890 minor = getminor(dev); 2891 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2892 return (ENXIO); 2893 2894 part = XDF_PART(minor); 2895 if (!xdf_isopen(vdp, part)) 2896 return (ENXIO); 2897 2898 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2899 NULL, NULL, NULL, NULL)) 2900 return (ENXIO); 2901 2902 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2903 return (ENOSPC); 2904 2905 if (U_INVAL(uiop)) 2906 return (EINVAL); 2907 2908 return (aphysio(xdf_strategy, anocancel, dev, B_READ, xdfmin, aiop)); 2909 } 2910 2911 /*ARGSUSED*/ 2912 static int 2913 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 2914 { 2915 xdf_t *vdp; 2916 minor_t minor; 2917 struct uio *uiop = aiop->aio_uio; 2918 diskaddr_t p_blkcnt; 2919 int part; 2920 2921 minor = getminor(dev); 2922 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2923 return (ENXIO); 2924 2925 part = XDF_PART(minor); 2926 if (!xdf_isopen(vdp, part)) 2927 return (ENXIO); 2928 2929 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2930 NULL, NULL, NULL, NULL)) 2931 return (ENXIO); 2932 2933 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2934 return (ENOSPC); 2935 2936 if (U_INVAL(uiop)) 2937 return (EINVAL); 2938 2939 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, xdfmin, aiop)); 2940 } 2941 2942 static int 2943 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 2944 { 2945 struct buf dumpbuf, *dbp = &dumpbuf; 2946 xdf_t *vdp; 2947 minor_t minor; 2948 int err = 0; 2949 int part; 2950 diskaddr_t p_blkcnt, p_blkst; 2951 2952 minor = getminor(dev); 2953 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2954 return (ENXIO); 2955 2956 DPRINTF(IO_DBG, ("xdf@%s: dump addr (0x%p) blk (%ld) nblks (%d)\n", 2957 vdp->xdf_addr, (void *)addr, blkno, nblk)); 2958 2959 /* We don't allow IO from the oe_change callback thread */ 2960 ASSERT(curthread != vdp->xdf_oe_change_thread); 2961 2962 part = XDF_PART(minor); 2963 if (!xdf_isopen(vdp, part)) 2964 return (ENXIO); 2965 2966 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 2967 NULL, NULL, NULL)) 2968 return (ENXIO); 2969 2970 if ((blkno + nblk) > 2971 (p_blkcnt * (vdp->xdf_xdev_secsize / XB_BSIZE))) { 2972 cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64, 2973 vdp->xdf_addr, (daddr_t)((blkno + nblk) / 2974 (vdp->xdf_xdev_secsize / XB_BSIZE)), (uint64_t)p_blkcnt); 2975 return (EINVAL); 2976 } 2977 2978 bioinit(dbp); 2979 dbp->b_flags = B_BUSY; 2980 dbp->b_un.b_addr = addr; 2981 dbp->b_bcount = nblk << DEV_BSHIFT; 2982 dbp->b_blkno = blkno; 2983 dbp->b_edev = dev; 2984 dbp->b_private = (void *)(uintptr_t)p_blkst; 2985 2986 mutex_enter(&vdp->xdf_dev_lk); 2987 xdf_bp_push(vdp, dbp); 2988 mutex_exit(&vdp->xdf_dev_lk); 2989 xdf_io_start(vdp); 2990 err = xdf_ring_drain(vdp); 2991 biofini(dbp); 2992 return (err); 2993 } 2994 2995 /*ARGSUSED*/ 2996 static int 2997 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 2998 { 2999 minor_t minor; 3000 xdf_t *vdp; 3001 int part; 3002 ulong_t parbit; 3003 3004 minor = getminor(dev); 3005 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 3006 return (ENXIO); 3007 3008 mutex_enter(&vdp->xdf_dev_lk); 3009 part = XDF_PART(minor); 3010 if (!xdf_isopen(vdp, part)) { 3011 mutex_exit(&vdp->xdf_dev_lk); 3012 return (ENXIO); 3013 } 3014 parbit = 1 << part; 3015 3016 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); 3017 if (otyp == OTYP_LYR) { 3018 ASSERT(vdp->xdf_vd_lyropen[part] > 0); 3019 if (--vdp->xdf_vd_lyropen[part] == 0) 3020 vdp->xdf_vd_open[otyp] &= ~parbit; 3021 } else { 3022 vdp->xdf_vd_open[otyp] &= ~parbit; 3023 } 3024 vdp->xdf_vd_exclopen &= ~parbit; 3025 3026 mutex_exit(&vdp->xdf_dev_lk); 3027 return (0); 3028 } 3029 3030 static int 3031 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 3032 { 3033 minor_t minor; 3034 xdf_t *vdp; 3035 int part; 3036 ulong_t parbit; 3037 diskaddr_t p_blkct = 0; 3038 boolean_t firstopen; 3039 boolean_t nodelay; 3040 3041 minor = getminor(*devp); 3042 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 3043 return (ENXIO); 3044 3045 nodelay = (flag & (FNDELAY | FNONBLOCK)); 3046 3047 DPRINTF(DDI_DBG, ("xdf@%s: opening\n", vdp->xdf_addr)); 3048 3049 /* do cv_wait until connected or failed */ 3050 mutex_enter(&vdp->xdf_cb_lk); 3051 mutex_enter(&vdp->xdf_dev_lk); 3052 if (!nodelay && (xdf_connect_locked(vdp, B_TRUE) != XD_READY)) { 3053 mutex_exit(&vdp->xdf_dev_lk); 3054 mutex_exit(&vdp->xdf_cb_lk); 3055 return (ENXIO); 3056 } 3057 mutex_exit(&vdp->xdf_cb_lk); 3058 3059 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 3060 mutex_exit(&vdp->xdf_dev_lk); 3061 return (EROFS); 3062 } 3063 3064 part = XDF_PART(minor); 3065 parbit = 1 << part; 3066 if ((vdp->xdf_vd_exclopen & parbit) || 3067 ((flag & FEXCL) && xdf_isopen(vdp, part))) { 3068 mutex_exit(&vdp->xdf_dev_lk); 3069 return (EBUSY); 3070 } 3071 3072 /* are we the first one to open this node? */ 3073 firstopen = !xdf_isopen(vdp, -1); 3074 3075 if (otyp == OTYP_LYR) 3076 vdp->xdf_vd_lyropen[part]++; 3077 3078 vdp->xdf_vd_open[otyp] |= parbit; 3079 3080 if (flag & FEXCL) 3081 vdp->xdf_vd_exclopen |= parbit; 3082 3083 mutex_exit(&vdp->xdf_dev_lk); 3084 3085 /* force a re-validation */ 3086 if (firstopen) 3087 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 3088 3089 /* If this is a non-blocking open then we're done */ 3090 if (nodelay) 3091 return (0); 3092 3093 /* 3094 * This is a blocking open, so we require: 3095 * - that the disk have a valid label on it 3096 * - that the size of the partition that we're opening is non-zero 3097 */ 3098 if ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 3099 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0)) { 3100 (void) xdf_close(*devp, flag, otyp, credp); 3101 return (ENXIO); 3102 } 3103 3104 return (0); 3105 } 3106 3107 /*ARGSUSED*/ 3108 static void 3109 xdf_watch_hp_status_cb(dev_info_t *dip, const char *path, void *arg) 3110 { 3111 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 3112 cv_broadcast(&vdp->xdf_hp_status_cv); 3113 } 3114 3115 static int 3116 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 3117 char *name, caddr_t valuep, int *lengthp) 3118 { 3119 xdf_t *vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 3120 3121 /* 3122 * Sanity check that if a dev_t or dip were specified that they 3123 * correspond to this device driver. On debug kernels we'll 3124 * panic and on non-debug kernels we'll return failure. 3125 */ 3126 ASSERT(ddi_driver_major(dip) == xdf_major); 3127 ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdf_major)); 3128 if ((ddi_driver_major(dip) != xdf_major) || 3129 ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdf_major))) 3130 return (DDI_PROP_NOT_FOUND); 3131 3132 if (vdp == NULL) 3133 return (ddi_prop_op(dev, dip, prop_op, flags, 3134 name, valuep, lengthp)); 3135 3136 return (cmlb_prop_op(vdp->xdf_vd_lbl, 3137 dev, dip, prop_op, flags, name, valuep, lengthp, 3138 XDF_PART(getminor(dev)), NULL)); 3139 } 3140 3141 /*ARGSUSED*/ 3142 static int 3143 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 3144 { 3145 int instance = XDF_INST(getminor((dev_t)arg)); 3146 xdf_t *vbdp; 3147 3148 switch (cmd) { 3149 case DDI_INFO_DEVT2DEVINFO: 3150 if ((vbdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) { 3151 *rp = NULL; 3152 return (DDI_FAILURE); 3153 } 3154 *rp = vbdp->xdf_dip; 3155 return (DDI_SUCCESS); 3156 3157 case DDI_INFO_DEVT2INSTANCE: 3158 *rp = (void *)(uintptr_t)instance; 3159 return (DDI_SUCCESS); 3160 3161 default: 3162 return (DDI_FAILURE); 3163 } 3164 } 3165 3166 /*ARGSUSED*/ 3167 static int 3168 xdf_resume(dev_info_t *dip) 3169 { 3170 xdf_t *vdp; 3171 char *oename; 3172 3173 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 3174 goto err; 3175 3176 if (xdf_debug & SUSRES_DBG) 3177 xen_printf("xdf@%s: xdf_resume\n", vdp->xdf_addr); 3178 3179 mutex_enter(&vdp->xdf_cb_lk); 3180 3181 if (xvdi_resume(dip) != DDI_SUCCESS) { 3182 mutex_exit(&vdp->xdf_cb_lk); 3183 goto err; 3184 } 3185 3186 if (((oename = xvdi_get_oename(dip)) == NULL) || 3187 (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3188 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)) { 3189 mutex_exit(&vdp->xdf_cb_lk); 3190 goto err; 3191 } 3192 3193 mutex_enter(&vdp->xdf_dev_lk); 3194 ASSERT(vdp->xdf_state != XD_READY); 3195 xdf_set_state(vdp, XD_UNKNOWN); 3196 mutex_exit(&vdp->xdf_dev_lk); 3197 3198 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3199 mutex_exit(&vdp->xdf_cb_lk); 3200 goto err; 3201 } 3202 3203 mutex_exit(&vdp->xdf_cb_lk); 3204 3205 if (xdf_debug & SUSRES_DBG) 3206 xen_printf("xdf@%s: xdf_resume: done\n", vdp->xdf_addr); 3207 return (DDI_SUCCESS); 3208 err: 3209 if (xdf_debug & SUSRES_DBG) 3210 xen_printf("xdf@%s: xdf_resume: fail\n", vdp->xdf_addr); 3211 return (DDI_FAILURE); 3212 } 3213 3214 static int 3215 xdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3216 { 3217 int n, instance = ddi_get_instance(dip); 3218 ddi_iblock_cookie_t ibc, softibc; 3219 boolean_t dev_iscd = B_FALSE; 3220 xdf_t *vdp; 3221 char *oename, *xsname, *str; 3222 3223 if ((n = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_NOTPROM, 3224 "xdf_debug", 0)) != 0) 3225 xdf_debug = n; 3226 3227 switch (cmd) { 3228 case DDI_RESUME: 3229 return (xdf_resume(dip)); 3230 case DDI_ATTACH: 3231 break; 3232 default: 3233 return (DDI_FAILURE); 3234 } 3235 /* DDI_ATTACH */ 3236 3237 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 3238 ((oename = xvdi_get_oename(dip)) == NULL)) 3239 return (DDI_FAILURE); 3240 3241 /* 3242 * Disable auto-detach. This is necessary so that we don't get 3243 * detached while we're disconnected from the back end. 3244 */ 3245 if ((ddi_prop_update_int(DDI_DEV_T_NONE, dip, 3246 DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS)) 3247 return (DDI_FAILURE); 3248 3249 /* driver handles kernel-issued IOCTLs */ 3250 if (ddi_prop_create(DDI_DEV_T_NONE, dip, 3251 DDI_PROP_CANSLEEP, DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) 3252 return (DDI_FAILURE); 3253 3254 if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS) 3255 return (DDI_FAILURE); 3256 3257 if (ddi_get_soft_iblock_cookie(dip, 3258 DDI_SOFTINT_LOW, &softibc) != DDI_SUCCESS) 3259 return (DDI_FAILURE); 3260 3261 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 3262 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 3263 ddi_get_name_addr(dip)); 3264 return (DDI_FAILURE); 3265 } 3266 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 3267 dev_iscd = B_TRUE; 3268 strfree(str); 3269 3270 if (ddi_soft_state_zalloc(xdf_ssp, instance) != DDI_SUCCESS) 3271 return (DDI_FAILURE); 3272 3273 DPRINTF(DDI_DBG, ("xdf@%s: attaching\n", ddi_get_name_addr(dip))); 3274 vdp = ddi_get_soft_state(xdf_ssp, instance); 3275 ddi_set_driver_private(dip, vdp); 3276 vdp->xdf_dip = dip; 3277 vdp->xdf_addr = ddi_get_name_addr(dip); 3278 vdp->xdf_suspending = B_FALSE; 3279 vdp->xdf_media_req_supported = B_FALSE; 3280 vdp->xdf_peer = INVALID_DOMID; 3281 vdp->xdf_evtchn = INVALID_EVTCHN; 3282 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 3283 offsetof(v_req_t, v_link)); 3284 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 3285 cv_init(&vdp->xdf_hp_status_cv, NULL, CV_DEFAULT, NULL); 3286 cv_init(&vdp->xdf_mstate_cv, NULL, CV_DEFAULT, NULL); 3287 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3288 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3289 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3290 vdp->xdf_cmbl_reattach = B_TRUE; 3291 if (dev_iscd) { 3292 vdp->xdf_dinfo |= VDISK_CDROM; 3293 vdp->xdf_mstate = DKIO_EJECTED; 3294 } else { 3295 vdp->xdf_mstate = DKIO_NONE; 3296 } 3297 3298 if ((vdp->xdf_ready_tq = ddi_taskq_create(dip, "xdf_ready_tq", 3299 1, TASKQ_DEFAULTPRI, 0)) == NULL) 3300 goto errout0; 3301 3302 if (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3303 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS) 3304 goto errout0; 3305 3306 if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 3307 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 3308 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 3309 ddi_get_name_addr(dip)); 3310 goto errout0; 3311 } 3312 3313 /* 3314 * Initialize the physical geometry stucture. Note that currently 3315 * we don't know the size of the backend device so the number 3316 * of blocks on the device will be initialized to zero. Once 3317 * we connect to the backend device we'll update the physical 3318 * geometry to reflect the real size of the device. 3319 */ 3320 xdf_synthetic_pgeom(dip, &vdp->xdf_pgeom); 3321 vdp->xdf_pgeom_fixed = B_FALSE; 3322 3323 /* 3324 * create default device minor nodes: non-removable disk 3325 * we will adjust minor nodes after we are connected w/ backend 3326 */ 3327 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 3328 if (xdf_cmlb_attach(vdp) != 0) { 3329 cmn_err(CE_WARN, 3330 "xdf@%s: attach failed, cmlb attach failed", 3331 ddi_get_name_addr(dip)); 3332 goto errout0; 3333 } 3334 3335 /* 3336 * We ship with cache-enabled disks 3337 */ 3338 vdp->xdf_wce = B_TRUE; 3339 3340 mutex_enter(&vdp->xdf_cb_lk); 3341 /* Watch backend XenbusState change */ 3342 if (xvdi_add_event_handler(dip, 3343 XS_OE_STATE, xdf_oe_change, NULL) != DDI_SUCCESS) { 3344 mutex_exit(&vdp->xdf_cb_lk); 3345 goto errout0; 3346 } 3347 3348 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3349 cmn_err(CE_WARN, "xdf@%s: start connection failed", 3350 ddi_get_name_addr(dip)); 3351 mutex_exit(&vdp->xdf_cb_lk); 3352 goto errout1; 3353 } 3354 mutex_exit(&vdp->xdf_cb_lk); 3355 3356 #if defined(XPV_HVM_DRIVER) 3357 3358 xdf_hvm_add(dip); 3359 3360 /* Report our version to dom0. */ 3361 if (xenbus_printf(XBT_NULL, "guest/xdf", "version", "%d", 3362 HVMPV_XDF_VERS)) 3363 cmn_err(CE_WARN, "xdf: couldn't write version\n"); 3364 3365 #else /* !XPV_HVM_DRIVER */ 3366 3367 /* create kstat for iostat(1M) */ 3368 if (xdf_kstat_create(dip, "xdf", instance) != 0) { 3369 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 3370 ddi_get_name_addr(dip)); 3371 goto errout1; 3372 } 3373 3374 #endif /* !XPV_HVM_DRIVER */ 3375 3376 ddi_report_dev(dip); 3377 DPRINTF(DDI_DBG, ("xdf@%s: attached\n", vdp->xdf_addr)); 3378 return (DDI_SUCCESS); 3379 3380 errout1: 3381 (void) xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed); 3382 xvdi_remove_event_handler(dip, XS_OE_STATE); 3383 errout0: 3384 if (vdp->xdf_vd_lbl != NULL) { 3385 cmlb_detach(vdp->xdf_vd_lbl, NULL); 3386 cmlb_free_handle(&vdp->xdf_vd_lbl); 3387 vdp->xdf_vd_lbl = NULL; 3388 } 3389 if (vdp->xdf_softintr_id != NULL) 3390 ddi_remove_softintr(vdp->xdf_softintr_id); 3391 xvdi_remove_xb_watch_handlers(dip); 3392 if (vdp->xdf_ready_tq != NULL) 3393 ddi_taskq_destroy(vdp->xdf_ready_tq); 3394 mutex_destroy(&vdp->xdf_cb_lk); 3395 mutex_destroy(&vdp->xdf_dev_lk); 3396 cv_destroy(&vdp->xdf_dev_cv); 3397 cv_destroy(&vdp->xdf_hp_status_cv); 3398 ddi_soft_state_free(xdf_ssp, instance); 3399 ddi_set_driver_private(dip, NULL); 3400 ddi_prop_remove_all(dip); 3401 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(dip)); 3402 return (DDI_FAILURE); 3403 } 3404 3405 static int 3406 xdf_suspend(dev_info_t *dip) 3407 { 3408 int instance = ddi_get_instance(dip); 3409 xdf_t *vdp; 3410 3411 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) 3412 return (DDI_FAILURE); 3413 3414 if (xdf_debug & SUSRES_DBG) 3415 xen_printf("xdf@%s: xdf_suspend\n", vdp->xdf_addr); 3416 3417 xvdi_suspend(dip); 3418 3419 mutex_enter(&vdp->xdf_cb_lk); 3420 mutex_enter(&vdp->xdf_dev_lk); 3421 3422 vdp->xdf_suspending = B_TRUE; 3423 xdf_ring_destroy(vdp); 3424 xdf_set_state(vdp, XD_SUSPEND); 3425 vdp->xdf_suspending = B_FALSE; 3426 3427 mutex_exit(&vdp->xdf_dev_lk); 3428 mutex_exit(&vdp->xdf_cb_lk); 3429 3430 if (xdf_debug & SUSRES_DBG) 3431 xen_printf("xdf@%s: xdf_suspend: done\n", vdp->xdf_addr); 3432 3433 return (DDI_SUCCESS); 3434 } 3435 3436 static int 3437 xdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3438 { 3439 xdf_t *vdp; 3440 int instance; 3441 3442 switch (cmd) { 3443 3444 case DDI_PM_SUSPEND: 3445 break; 3446 3447 case DDI_SUSPEND: 3448 return (xdf_suspend(dip)); 3449 3450 case DDI_DETACH: 3451 break; 3452 3453 default: 3454 return (DDI_FAILURE); 3455 } 3456 3457 instance = ddi_get_instance(dip); 3458 DPRINTF(DDI_DBG, ("xdf@%s: detaching\n", ddi_get_name_addr(dip))); 3459 vdp = ddi_get_soft_state(xdf_ssp, instance); 3460 3461 if (vdp == NULL) 3462 return (DDI_FAILURE); 3463 3464 mutex_enter(&vdp->xdf_cb_lk); 3465 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 3466 if (vdp->xdf_state != XD_CLOSED) { 3467 mutex_exit(&vdp->xdf_cb_lk); 3468 return (DDI_FAILURE); 3469 } 3470 mutex_exit(&vdp->xdf_cb_lk); 3471 3472 ASSERT(!ISDMACBON(vdp)); 3473 3474 #if defined(XPV_HVM_DRIVER) 3475 xdf_hvm_rm(dip); 3476 #endif /* XPV_HVM_DRIVER */ 3477 3478 if (vdp->xdf_timeout_id != 0) 3479 (void) untimeout(vdp->xdf_timeout_id); 3480 3481 xvdi_remove_event_handler(dip, XS_OE_STATE); 3482 ddi_taskq_destroy(vdp->xdf_ready_tq); 3483 3484 cmlb_detach(vdp->xdf_vd_lbl, NULL); 3485 cmlb_free_handle(&vdp->xdf_vd_lbl); 3486 3487 /* we'll support backend running in domU later */ 3488 #ifdef DOMU_BACKEND 3489 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 3490 #endif 3491 3492 list_destroy(&vdp->xdf_vreq_act); 3493 ddi_prop_remove_all(dip); 3494 xdf_kstat_delete(dip); 3495 ddi_remove_softintr(vdp->xdf_softintr_id); 3496 xvdi_remove_xb_watch_handlers(dip); 3497 ddi_set_driver_private(dip, NULL); 3498 cv_destroy(&vdp->xdf_dev_cv); 3499 mutex_destroy(&vdp->xdf_cb_lk); 3500 mutex_destroy(&vdp->xdf_dev_lk); 3501 if (vdp->xdf_cache_flush_block != NULL) 3502 kmem_free(vdp->xdf_flush_mem, 2 * vdp->xdf_xdev_secsize); 3503 ddi_soft_state_free(xdf_ssp, instance); 3504 return (DDI_SUCCESS); 3505 } 3506 3507 /* 3508 * Driver linkage structures. 3509 */ 3510 static struct cb_ops xdf_cbops = { 3511 xdf_open, 3512 xdf_close, 3513 xdf_strategy, 3514 nodev, 3515 xdf_dump, 3516 xdf_read, 3517 xdf_write, 3518 xdf_ioctl, 3519 nodev, 3520 nodev, 3521 nodev, 3522 nochpoll, 3523 xdf_prop_op, 3524 NULL, 3525 D_MP | D_NEW | D_64BIT, 3526 CB_REV, 3527 xdf_aread, 3528 xdf_awrite 3529 }; 3530 3531 struct dev_ops xdf_devops = { 3532 DEVO_REV, /* devo_rev */ 3533 0, /* devo_refcnt */ 3534 xdf_getinfo, /* devo_getinfo */ 3535 nulldev, /* devo_identify */ 3536 nulldev, /* devo_probe */ 3537 xdf_attach, /* devo_attach */ 3538 xdf_detach, /* devo_detach */ 3539 nodev, /* devo_reset */ 3540 &xdf_cbops, /* devo_cb_ops */ 3541 NULL, /* devo_bus_ops */ 3542 NULL, /* devo_power */ 3543 ddi_quiesce_not_supported, /* devo_quiesce */ 3544 }; 3545 3546 /* 3547 * Module linkage structures. 3548 */ 3549 static struct modldrv modldrv = { 3550 &mod_driverops, /* Type of module. This one is a driver */ 3551 "virtual block driver", /* short description */ 3552 &xdf_devops /* driver specific ops */ 3553 }; 3554 3555 static struct modlinkage xdf_modlinkage = { 3556 MODREV_1, (void *)&modldrv, NULL 3557 }; 3558 3559 /* 3560 * standard module entry points 3561 */ 3562 int 3563 _init(void) 3564 { 3565 int rc; 3566 3567 xdf_major = ddi_name_to_major("xdf"); 3568 if (xdf_major == (major_t)-1) 3569 return (EINVAL); 3570 3571 if ((rc = ddi_soft_state_init(&xdf_ssp, sizeof (xdf_t), 0)) != 0) 3572 return (rc); 3573 3574 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 3575 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3576 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 3577 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3578 3579 #if defined(XPV_HVM_DRIVER) 3580 xdf_hvm_init(); 3581 #endif /* XPV_HVM_DRIVER */ 3582 3583 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 3584 #if defined(XPV_HVM_DRIVER) 3585 xdf_hvm_fini(); 3586 #endif /* XPV_HVM_DRIVER */ 3587 kmem_cache_destroy(xdf_vreq_cache); 3588 kmem_cache_destroy(xdf_gs_cache); 3589 ddi_soft_state_fini(&xdf_ssp); 3590 return (rc); 3591 } 3592 3593 return (rc); 3594 } 3595 3596 int 3597 _fini(void) 3598 { 3599 int err; 3600 if ((err = mod_remove(&xdf_modlinkage)) != 0) 3601 return (err); 3602 3603 #if defined(XPV_HVM_DRIVER) 3604 xdf_hvm_fini(); 3605 #endif /* XPV_HVM_DRIVER */ 3606 3607 kmem_cache_destroy(xdf_vreq_cache); 3608 kmem_cache_destroy(xdf_gs_cache); 3609 ddi_soft_state_fini(&xdf_ssp); 3610 3611 return (0); 3612 } 3613 3614 int 3615 _info(struct modinfo *modinfop) 3616 { 3617 return (mod_info(&xdf_modlinkage, modinfop)); 3618 } 3619