1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * xdf.c - Xen Virtual Block Device Driver 29 * TODO: 30 * - support alternate block size (currently only DEV_BSIZE supported) 31 * - revalidate geometry for removable devices 32 * 33 * This driver export solaris disk device nodes, accepts IO requests from 34 * those nodes, and services those requests by talking to a backend device 35 * in another domain. 36 * 37 * Communication with the backend device is done via a ringbuffer (which is 38 * managed via xvdi interfaces) and dma memory (which is managed via ddi 39 * interfaces). 40 * 41 * Communication with the backend device is dependant upon establishing a 42 * connection to the backend device. This connection process involves 43 * reading device configuration information from xenbus and publishing 44 * some frontend runtime configuration parameters via the xenbus (for 45 * consumption by the backend). Once we've published runtime configuration 46 * information via the xenbus, the backend device can enter the connected 47 * state and we'll enter the XD_CONNECTED state. But before we can allow 48 * random IO to begin, we need to do IO to the backend device to determine 49 * the device label and if flush operations are supported. Once this is 50 * done we enter the XD_READY state and can process any IO operations. 51 * 52 * We recieve notifications of xenbus state changes for the backend device 53 * (aka, the "other end") via the xdf_oe_change() callback. This callback 54 * is single threaded, meaning that we can't recieve new notification of 55 * other end state changes while we're processing an outstanding 56 * notification of an other end state change. There for we can't do any 57 * blocking operations from the xdf_oe_change() callback. This is why we 58 * have a seperate taskq (xdf_ready_tq) which exists to do the necessary 59 * IO to get us from the XD_CONNECTED to the XD_READY state. All IO 60 * generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go 61 * throught xdf_lb_rdwr(), which is a synchronous IO interface. IOs 62 * generated by the xdf_ready_tq_thread thread have priority over all 63 * other IO requests. 64 * 65 * We also communicate with the backend device via the xenbus "media-req" 66 * (XBP_MEDIA_REQ) property. For more information on this see the 67 * comments in blkif.h. 68 */ 69 70 #include <io/xdf.h> 71 72 #include <sys/conf.h> 73 #include <sys/dkio.h> 74 #include <sys/promif.h> 75 #include <sys/sysmacros.h> 76 #include <sys/kstat.h> 77 #include <sys/mach_mmu.h> 78 #ifdef XPV_HVM_DRIVER 79 #include <sys/xpv_support.h> 80 #include <sys/sunndi.h> 81 #else /* !XPV_HVM_DRIVER */ 82 #include <sys/evtchn_impl.h> 83 #endif /* !XPV_HVM_DRIVER */ 84 #include <public/io/xenbus.h> 85 #include <xen/sys/xenbus_impl.h> 86 #include <sys/scsi/generic/inquiry.h> 87 #include <xen/io/blkif_impl.h> 88 #include <sys/fdio.h> 89 #include <sys/cdio.h> 90 91 /* 92 * DEBUG_EVAL can be used to include debug only statements without 93 * having to use '#ifdef DEBUG' statements 94 */ 95 #ifdef DEBUG 96 #define DEBUG_EVAL(x) (x) 97 #else /* !DEBUG */ 98 #define DEBUG_EVAL(x) 99 #endif /* !DEBUG */ 100 101 #define XDF_DRAIN_MSEC_DELAY (50*1000) /* 00.05 sec */ 102 #define XDF_DRAIN_RETRY_COUNT 200 /* 10.00 sec */ 103 104 #define INVALID_DOMID ((domid_t)-1) 105 #define FLUSH_DISKCACHE 0x1 106 #define WRITE_BARRIER 0x2 107 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 108 #define USE_WRITE_BARRIER(vdp) \ 109 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 110 #define USE_FLUSH_DISKCACHE(vdp) \ 111 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 112 #define IS_WRITE_BARRIER(vdp, bp) \ 113 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 114 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 115 #define IS_FLUSH_DISKCACHE(bp) \ 116 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 117 118 #define VREQ_DONE(vreq) \ 119 VOID2BOOLEAN(((vreq)->v_status == VREQ_DMAWIN_DONE) && \ 120 (((vreq)->v_flush_diskcache == FLUSH_DISKCACHE) || \ 121 (((vreq)->v_dmaw + 1) == (vreq)->v_ndmaws))) 122 123 #define BP_VREQ(bp) ((v_req_t *)((bp)->av_back)) 124 #define BP_VREQ_SET(bp, vreq) (((bp)->av_back = (buf_t *)(vreq))) 125 126 extern int do_polled_io; 127 128 /* run-time tunables that we don't want the compiler to optimize away */ 129 volatile int xdf_debug = 0; 130 volatile boolean_t xdf_barrier_flush_disable = B_FALSE; 131 132 /* per module globals */ 133 major_t xdf_major; 134 static void *xdf_ssp; 135 static kmem_cache_t *xdf_vreq_cache; 136 static kmem_cache_t *xdf_gs_cache; 137 static int xdf_maxphys = XB_MAXPHYS; 138 static diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 139 static int xdf_fbrewrites; /* flush block re-write count */ 140 141 /* misc public functions (used by xdf_shell.c) */ 142 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); 143 int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 144 145 /* misc private functions */ 146 static void xdf_io_start(xdf_t *); 147 148 /* callbacks from commmon label */ 149 static cmlb_tg_ops_t xdf_lb_ops = { 150 TG_DK_OPS_VERSION_1, 151 xdf_lb_rdwr, 152 xdf_lb_getinfo 153 }; 154 155 /* 156 * I/O buffer DMA attributes 157 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 158 */ 159 static ddi_dma_attr_t xb_dma_attr = { 160 DMA_ATTR_V0, 161 (uint64_t)0, /* lowest address */ 162 (uint64_t)0xffffffffffffffff, /* highest usable address */ 163 (uint64_t)0xffffff, /* DMA counter limit max */ 164 (uint64_t)XB_BSIZE, /* alignment in bytes */ 165 XB_BSIZE - 1, /* bitmap of burst sizes */ 166 XB_BSIZE, /* min transfer */ 167 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 168 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 169 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 170 XB_BSIZE, /* granularity */ 171 0, /* flags (reserved) */ 172 }; 173 174 static ddi_device_acc_attr_t xc_acc_attr = { 175 DDI_DEVICE_ATTR_V0, 176 DDI_NEVERSWAP_ACC, 177 DDI_STRICTORDER_ACC 178 }; 179 180 static void 181 xdf_timeout_handler(void *arg) 182 { 183 xdf_t *vdp = arg; 184 185 mutex_enter(&vdp->xdf_dev_lk); 186 vdp->xdf_timeout_id = 0; 187 mutex_exit(&vdp->xdf_dev_lk); 188 189 /* new timeout thread could be re-scheduled */ 190 xdf_io_start(vdp); 191 } 192 193 /* 194 * callback func when DMA/GTE resources is available 195 * 196 * Note: we only register one callback function to grant table subsystem 197 * since we only have one 'struct gnttab_free_callback' in xdf_t. 198 */ 199 static int 200 xdf_dmacallback(caddr_t arg) 201 { 202 xdf_t *vdp = (xdf_t *)arg; 203 ASSERT(vdp != NULL); 204 205 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 206 vdp->xdf_addr)); 207 208 ddi_trigger_softintr(vdp->xdf_softintr_id); 209 return (DDI_DMA_CALLBACK_DONE); 210 } 211 212 static ge_slot_t * 213 gs_get(xdf_t *vdp, int isread) 214 { 215 grant_ref_t gh; 216 ge_slot_t *gs; 217 218 /* try to alloc GTEs needed in this slot, first */ 219 if (gnttab_alloc_grant_references( 220 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 221 if (vdp->xdf_gnt_callback.next == NULL) { 222 SETDMACBON(vdp); 223 gnttab_request_free_callback( 224 &vdp->xdf_gnt_callback, 225 (void (*)(void *))xdf_dmacallback, 226 (void *)vdp, 227 BLKIF_MAX_SEGMENTS_PER_REQUEST); 228 } 229 return (NULL); 230 } 231 232 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 233 if (gs == NULL) { 234 gnttab_free_grant_references(gh); 235 if (vdp->xdf_timeout_id == 0) 236 /* restart I/O after one second */ 237 vdp->xdf_timeout_id = 238 timeout(xdf_timeout_handler, vdp, hz); 239 return (NULL); 240 } 241 242 /* init gs_slot */ 243 gs->gs_oeid = vdp->xdf_peer; 244 gs->gs_isread = isread; 245 gs->gs_ghead = gh; 246 gs->gs_ngrefs = 0; 247 248 return (gs); 249 } 250 251 static void 252 gs_free(ge_slot_t *gs) 253 { 254 int i; 255 256 /* release all grant table entry resources used in this slot */ 257 for (i = 0; i < gs->gs_ngrefs; i++) 258 gnttab_end_foreign_access(gs->gs_ge[i], !gs->gs_isread, 0); 259 gnttab_free_grant_references(gs->gs_ghead); 260 list_remove(&gs->gs_vreq->v_gs, gs); 261 kmem_cache_free(xdf_gs_cache, gs); 262 } 263 264 static grant_ref_t 265 gs_grant(ge_slot_t *gs, mfn_t mfn) 266 { 267 grant_ref_t gr = gnttab_claim_grant_reference(&gs->gs_ghead); 268 269 ASSERT(gr != -1); 270 ASSERT(gs->gs_ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 271 gs->gs_ge[gs->gs_ngrefs++] = gr; 272 gnttab_grant_foreign_access_ref(gr, gs->gs_oeid, mfn, !gs->gs_isread); 273 274 return (gr); 275 } 276 277 /* 278 * Alloc a vreq for this bp 279 * bp->av_back contains the pointer to the vreq upon return 280 */ 281 static v_req_t * 282 vreq_get(xdf_t *vdp, buf_t *bp) 283 { 284 v_req_t *vreq = NULL; 285 286 ASSERT(BP_VREQ(bp) == NULL); 287 288 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 289 if (vreq == NULL) { 290 if (vdp->xdf_timeout_id == 0) 291 /* restart I/O after one second */ 292 vdp->xdf_timeout_id = 293 timeout(xdf_timeout_handler, vdp, hz); 294 return (NULL); 295 } 296 bzero(vreq, sizeof (v_req_t)); 297 list_create(&vreq->v_gs, sizeof (ge_slot_t), 298 offsetof(ge_slot_t, gs_vreq_link)); 299 vreq->v_buf = bp; 300 vreq->v_status = VREQ_INIT; 301 vreq->v_runq = B_FALSE; 302 BP_VREQ_SET(bp, vreq); 303 /* init of other fields in vreq is up to the caller */ 304 305 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 306 307 return (vreq); 308 } 309 310 static void 311 vreq_free(xdf_t *vdp, v_req_t *vreq) 312 { 313 buf_t *bp = vreq->v_buf; 314 315 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 316 ASSERT(BP_VREQ(bp) == vreq); 317 318 list_remove(&vdp->xdf_vreq_act, vreq); 319 320 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 321 goto done; 322 323 switch (vreq->v_status) { 324 case VREQ_DMAWIN_DONE: 325 case VREQ_GS_ALLOCED: 326 case VREQ_DMABUF_BOUND: 327 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 328 /*FALLTHRU*/ 329 case VREQ_DMAMEM_ALLOCED: 330 if (!ALIGNED_XFER(bp)) { 331 ASSERT(vreq->v_abuf != NULL); 332 if (!IS_ERROR(bp) && IS_READ(bp)) 333 bcopy(vreq->v_abuf, bp->b_un.b_addr, 334 bp->b_bcount); 335 ddi_dma_mem_free(&vreq->v_align); 336 } 337 /*FALLTHRU*/ 338 case VREQ_MEMDMAHDL_ALLOCED: 339 if (!ALIGNED_XFER(bp)) 340 ddi_dma_free_handle(&vreq->v_memdmahdl); 341 /*FALLTHRU*/ 342 case VREQ_DMAHDL_ALLOCED: 343 ddi_dma_free_handle(&vreq->v_dmahdl); 344 break; 345 default: 346 break; 347 } 348 done: 349 ASSERT(!vreq->v_runq); 350 list_destroy(&vreq->v_gs); 351 kmem_cache_free(xdf_vreq_cache, vreq); 352 } 353 354 /* 355 * Snarf new data if our flush block was re-written 356 */ 357 static void 358 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 359 { 360 int nblks; 361 boolean_t mapin; 362 363 if (IS_WRITE_BARRIER(vdp, bp)) 364 return; /* write was a flush write */ 365 366 mapin = B_FALSE; 367 nblks = bp->b_bcount >> DEV_BSHIFT; 368 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 369 xdf_fbrewrites++; 370 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 371 mapin = B_TRUE; 372 bp_mapin(bp); 373 } 374 bcopy(bp->b_un.b_addr + 375 ((xdf_flush_block - blkno) << DEV_BSHIFT), 376 vdp->xdf_cache_flush_block, DEV_BSIZE); 377 if (mapin) 378 bp_mapout(bp); 379 } 380 } 381 382 /* 383 * Initalize the DMA and grant table resources for the buf 384 */ 385 static int 386 vreq_setup(xdf_t *vdp, v_req_t *vreq) 387 { 388 int rc; 389 ddi_dma_attr_t dmaattr; 390 uint_t ndcs, ndws; 391 ddi_dma_handle_t dh; 392 ddi_dma_handle_t mdh; 393 ddi_dma_cookie_t dc; 394 ddi_acc_handle_t abh; 395 caddr_t aba; 396 ge_slot_t *gs; 397 size_t bufsz; 398 off_t off; 399 size_t sz; 400 buf_t *bp = vreq->v_buf; 401 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 402 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 403 404 switch (vreq->v_status) { 405 case VREQ_INIT: 406 if (IS_FLUSH_DISKCACHE(bp)) { 407 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 408 DPRINTF(DMA_DBG, ("xdf@%s: " 409 "get ge_slotfailed\n", vdp->xdf_addr)); 410 return (DDI_FAILURE); 411 } 412 vreq->v_blkno = 0; 413 vreq->v_nslots = 1; 414 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 415 vreq->v_status = VREQ_GS_ALLOCED; 416 gs->gs_vreq = vreq; 417 list_insert_head(&vreq->v_gs, gs); 418 return (DDI_SUCCESS); 419 } 420 421 if (IS_WRITE_BARRIER(vdp, bp)) 422 vreq->v_flush_diskcache = WRITE_BARRIER; 423 vreq->v_blkno = bp->b_blkno + 424 (diskaddr_t)(uintptr_t)bp->b_private; 425 /* See if we wrote new data to our flush block */ 426 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 427 check_fbwrite(vdp, bp, vreq->v_blkno); 428 vreq->v_status = VREQ_INIT_DONE; 429 /*FALLTHRU*/ 430 431 case VREQ_INIT_DONE: 432 /* 433 * alloc DMA handle 434 */ 435 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 436 xdf_dmacallback, (caddr_t)vdp, &dh); 437 if (rc != DDI_SUCCESS) { 438 SETDMACBON(vdp); 439 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 440 vdp->xdf_addr)); 441 return (DDI_FAILURE); 442 } 443 444 vreq->v_dmahdl = dh; 445 vreq->v_status = VREQ_DMAHDL_ALLOCED; 446 /*FALLTHRU*/ 447 448 case VREQ_DMAHDL_ALLOCED: 449 /* 450 * alloc dma handle for 512-byte aligned buf 451 */ 452 if (!ALIGNED_XFER(bp)) { 453 /* 454 * XXPV: we need to temporarily enlarge the seg 455 * boundary and s/g length to work round CR6381968 456 */ 457 dmaattr = xb_dma_attr; 458 dmaattr.dma_attr_seg = (uint64_t)-1; 459 dmaattr.dma_attr_sgllen = INT_MAX; 460 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 461 xdf_dmacallback, (caddr_t)vdp, &mdh); 462 if (rc != DDI_SUCCESS) { 463 SETDMACBON(vdp); 464 DPRINTF(DMA_DBG, ("xdf@%s: " 465 "unaligned buf DMAhandle alloc failed\n", 466 vdp->xdf_addr)); 467 return (DDI_FAILURE); 468 } 469 vreq->v_memdmahdl = mdh; 470 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 471 } 472 /*FALLTHRU*/ 473 474 case VREQ_MEMDMAHDL_ALLOCED: 475 /* 476 * alloc 512-byte aligned buf 477 */ 478 if (!ALIGNED_XFER(bp)) { 479 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 480 bp_mapin(bp); 481 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 482 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 483 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 484 &aba, &bufsz, &abh); 485 if (rc != DDI_SUCCESS) { 486 SETDMACBON(vdp); 487 DPRINTF(DMA_DBG, ("xdf@%s: " 488 "DMA mem allocation failed\n", 489 vdp->xdf_addr)); 490 return (DDI_FAILURE); 491 } 492 493 vreq->v_abuf = aba; 494 vreq->v_align = abh; 495 vreq->v_status = VREQ_DMAMEM_ALLOCED; 496 497 ASSERT(bufsz >= bp->b_bcount); 498 if (!IS_READ(bp)) 499 bcopy(bp->b_un.b_addr, vreq->v_abuf, 500 bp->b_bcount); 501 } 502 /*FALLTHRU*/ 503 504 case VREQ_DMAMEM_ALLOCED: 505 /* 506 * dma bind 507 */ 508 if (ALIGNED_XFER(bp)) { 509 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 510 dma_flags, xdf_dmacallback, (caddr_t)vdp, 511 &dc, &ndcs); 512 } else { 513 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 514 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 515 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 516 } 517 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 518 /* get num of dma windows */ 519 if (rc == DDI_DMA_PARTIAL_MAP) { 520 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 521 ASSERT(rc == DDI_SUCCESS); 522 } else { 523 ndws = 1; 524 } 525 } else { 526 SETDMACBON(vdp); 527 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 528 vdp->xdf_addr)); 529 return (DDI_FAILURE); 530 } 531 532 vreq->v_dmac = dc; 533 vreq->v_dmaw = 0; 534 vreq->v_ndmacs = ndcs; 535 vreq->v_ndmaws = ndws; 536 vreq->v_nslots = ndws; 537 vreq->v_status = VREQ_DMABUF_BOUND; 538 /*FALLTHRU*/ 539 540 case VREQ_DMABUF_BOUND: 541 /* 542 * get ge_slot, callback is set upon failure from gs_get(), 543 * if not set previously 544 */ 545 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 546 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 547 vdp->xdf_addr)); 548 return (DDI_FAILURE); 549 } 550 551 vreq->v_status = VREQ_GS_ALLOCED; 552 gs->gs_vreq = vreq; 553 list_insert_head(&vreq->v_gs, gs); 554 break; 555 556 case VREQ_GS_ALLOCED: 557 /* nothing need to be done */ 558 break; 559 560 case VREQ_DMAWIN_DONE: 561 /* 562 * move to the next dma window 563 */ 564 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 565 566 /* get a ge_slot for this DMA window */ 567 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 568 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 569 vdp->xdf_addr)); 570 return (DDI_FAILURE); 571 } 572 573 vreq->v_dmaw++; 574 VERIFY(ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 575 &vreq->v_dmac, &vreq->v_ndmacs) == DDI_SUCCESS); 576 vreq->v_status = VREQ_GS_ALLOCED; 577 gs->gs_vreq = vreq; 578 list_insert_head(&vreq->v_gs, gs); 579 break; 580 581 default: 582 return (DDI_FAILURE); 583 } 584 585 return (DDI_SUCCESS); 586 } 587 588 static int 589 xdf_cmlb_attach(xdf_t *vdp) 590 { 591 dev_info_t *dip = vdp->xdf_dip; 592 593 return (cmlb_attach(dip, &xdf_lb_ops, 594 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 595 XD_IS_RM(vdp), 596 B_TRUE, 597 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD, 598 #if defined(XPV_HVM_DRIVER) 599 (XD_IS_CD(vdp) ? 0 : CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT) | 600 CMLB_INTERNAL_MINOR_NODES, 601 #else /* !XPV_HVM_DRIVER */ 602 XD_IS_CD(vdp) ? 0 : CMLB_FAKE_LABEL_ONE_PARTITION, 603 #endif /* !XPV_HVM_DRIVER */ 604 vdp->xdf_vd_lbl, NULL)); 605 } 606 607 static void 608 xdf_io_err(buf_t *bp, int err, size_t resid) 609 { 610 bioerror(bp, err); 611 if (resid == 0) 612 bp->b_resid = bp->b_bcount; 613 biodone(bp); 614 } 615 616 static void 617 xdf_kstat_enter(xdf_t *vdp, buf_t *bp) 618 { 619 v_req_t *vreq = BP_VREQ(bp); 620 621 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 622 623 if (vdp->xdf_xdev_iostat == NULL) 624 return; 625 if ((vreq != NULL) && vreq->v_runq) { 626 kstat_runq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 627 } else { 628 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 629 } 630 } 631 632 static void 633 xdf_kstat_exit(xdf_t *vdp, buf_t *bp) 634 { 635 v_req_t *vreq = BP_VREQ(bp); 636 637 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 638 639 if (vdp->xdf_xdev_iostat == NULL) 640 return; 641 if ((vreq != NULL) && vreq->v_runq) { 642 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 643 } else { 644 kstat_waitq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 645 } 646 } 647 648 static void 649 xdf_kstat_waitq_to_runq(xdf_t *vdp, buf_t *bp) 650 { 651 v_req_t *vreq = BP_VREQ(bp); 652 653 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 654 ASSERT(!vreq->v_runq); 655 656 vreq->v_runq = B_TRUE; 657 if (vdp->xdf_xdev_iostat == NULL) 658 return; 659 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 660 } 661 662 static void 663 xdf_kstat_runq_to_waitq(xdf_t *vdp, buf_t *bp) 664 { 665 v_req_t *vreq = BP_VREQ(bp); 666 667 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 668 ASSERT(vreq->v_runq); 669 670 vreq->v_runq = B_FALSE; 671 if (vdp->xdf_xdev_iostat == NULL) 672 return; 673 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 674 } 675 676 int 677 xdf_kstat_create(dev_info_t *dip, char *ks_module, int instance) 678 { 679 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 680 kstat_t *kstat; 681 buf_t *bp; 682 683 if ((kstat = kstat_create( 684 ks_module, instance, NULL, "disk", 685 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) 686 return (-1); 687 688 /* See comment about locking in xdf_kstat_delete(). */ 689 mutex_enter(&vdp->xdf_iostat_lk); 690 mutex_enter(&vdp->xdf_dev_lk); 691 692 /* only one kstat can exist at a time */ 693 if (vdp->xdf_xdev_iostat != NULL) { 694 mutex_exit(&vdp->xdf_dev_lk); 695 mutex_exit(&vdp->xdf_iostat_lk); 696 kstat_delete(kstat); 697 return (-1); 698 } 699 700 vdp->xdf_xdev_iostat = kstat; 701 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 702 kstat_install(vdp->xdf_xdev_iostat); 703 704 /* 705 * Now that we've created a kstat, we need to update the waitq and 706 * runq counts for the kstat to reflect our current state. 707 * 708 * For a buf_t structure to be on the runq, it must have a ring 709 * buffer slot associated with it. To get a ring buffer slot the 710 * buf must first have a v_req_t and a ge_slot_t associated with it. 711 * Then when it is granted a ring buffer slot, v_runq will be set to 712 * true. 713 * 714 * For a buf_t structure to be on the waitq, it must not be on the 715 * runq. So to find all the buf_t's that should be on waitq, we 716 * walk the active buf list and add any buf_t's which aren't on the 717 * runq to the waitq. 718 */ 719 bp = vdp->xdf_f_act; 720 while (bp != NULL) { 721 xdf_kstat_enter(vdp, bp); 722 bp = bp->av_forw; 723 } 724 if (vdp->xdf_ready_tq_bp != NULL) 725 xdf_kstat_enter(vdp, vdp->xdf_ready_tq_bp); 726 727 mutex_exit(&vdp->xdf_dev_lk); 728 mutex_exit(&vdp->xdf_iostat_lk); 729 return (0); 730 } 731 732 void 733 xdf_kstat_delete(dev_info_t *dip) 734 { 735 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 736 kstat_t *kstat; 737 buf_t *bp; 738 739 /* 740 * The locking order here is xdf_iostat_lk and then xdf_dev_lk. 741 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer 742 * and the contents of the our kstat. xdf_iostat_lk is used 743 * to protect the allocation and freeing of the actual kstat. 744 * xdf_dev_lk can't be used for this purpose because kstat 745 * readers use it to access the contents of the kstat and 746 * hence it can't be held when calling kstat_delete(). 747 */ 748 mutex_enter(&vdp->xdf_iostat_lk); 749 mutex_enter(&vdp->xdf_dev_lk); 750 751 if (vdp->xdf_xdev_iostat == NULL) { 752 mutex_exit(&vdp->xdf_dev_lk); 753 mutex_exit(&vdp->xdf_iostat_lk); 754 return; 755 } 756 757 /* 758 * We're about to destroy the kstat structures, so it isn't really 759 * necessary to update the runq and waitq counts. But, since this 760 * isn't a hot code path we can afford to be a little pedantic and 761 * go ahead and decrement the runq and waitq kstat counters to zero 762 * before free'ing them. This helps us ensure that we've gotten all 763 * our accounting correct. 764 * 765 * For an explanation of how we determine which buffers go on the 766 * runq vs which go on the waitq, see the comments in 767 * xdf_kstat_create(). 768 */ 769 bp = vdp->xdf_f_act; 770 while (bp != NULL) { 771 xdf_kstat_exit(vdp, bp); 772 bp = bp->av_forw; 773 } 774 if (vdp->xdf_ready_tq_bp != NULL) 775 xdf_kstat_exit(vdp, vdp->xdf_ready_tq_bp); 776 777 kstat = vdp->xdf_xdev_iostat; 778 vdp->xdf_xdev_iostat = NULL; 779 mutex_exit(&vdp->xdf_dev_lk); 780 kstat_delete(kstat); 781 mutex_exit(&vdp->xdf_iostat_lk); 782 } 783 784 /* 785 * Add an IO requests onto the active queue. 786 * 787 * We have to detect IOs generated by xdf_ready_tq_thread. These IOs 788 * are used to establish a connection to the backend, so they recieve 789 * priority over all other IOs. Since xdf_ready_tq_thread only does 790 * synchronous IO, there can only be one xdf_ready_tq_thread request at any 791 * given time and we record the buf associated with that request in 792 * xdf_ready_tq_bp. 793 */ 794 static void 795 xdf_bp_push(xdf_t *vdp, buf_t *bp) 796 { 797 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 798 ASSERT(bp->av_forw == NULL); 799 800 xdf_kstat_enter(vdp, bp); 801 802 if (curthread == vdp->xdf_ready_tq_thread) { 803 /* new IO requests from the ready thread */ 804 ASSERT(vdp->xdf_ready_tq_bp == NULL); 805 vdp->xdf_ready_tq_bp = bp; 806 return; 807 } 808 809 /* this is normal IO request */ 810 ASSERT(bp != vdp->xdf_ready_tq_bp); 811 812 if (vdp->xdf_f_act == NULL) { 813 /* this is only only IO on the active queue */ 814 ASSERT(vdp->xdf_l_act == NULL); 815 ASSERT(vdp->xdf_i_act == NULL); 816 vdp->xdf_f_act = vdp->xdf_l_act = vdp->xdf_i_act = bp; 817 return; 818 } 819 820 /* add this IO to the tail of the active queue */ 821 vdp->xdf_l_act->av_forw = bp; 822 vdp->xdf_l_act = bp; 823 if (vdp->xdf_i_act == NULL) 824 vdp->xdf_i_act = bp; 825 } 826 827 static void 828 xdf_bp_pop(xdf_t *vdp, buf_t *bp) 829 { 830 buf_t *bp_iter; 831 832 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 833 ASSERT(VREQ_DONE(BP_VREQ(bp))); 834 835 if (vdp->xdf_ready_tq_bp == bp) { 836 /* we're done with a ready thread IO request */ 837 ASSERT(bp->av_forw == NULL); 838 vdp->xdf_ready_tq_bp = NULL; 839 return; 840 } 841 842 /* we're done with a normal IO request */ 843 ASSERT((bp->av_forw != NULL) || (bp == vdp->xdf_l_act)); 844 ASSERT((bp->av_forw == NULL) || (bp != vdp->xdf_l_act)); 845 ASSERT(VREQ_DONE(BP_VREQ(vdp->xdf_f_act))); 846 ASSERT(vdp->xdf_f_act != vdp->xdf_i_act); 847 848 if (bp == vdp->xdf_f_act) { 849 /* This IO was at the head of our active queue. */ 850 vdp->xdf_f_act = bp->av_forw; 851 if (bp == vdp->xdf_l_act) 852 vdp->xdf_l_act = NULL; 853 } else { 854 /* There IO finished before some other pending IOs. */ 855 bp_iter = vdp->xdf_f_act; 856 while (bp != bp_iter->av_forw) { 857 bp_iter = bp_iter->av_forw; 858 ASSERT(VREQ_DONE(BP_VREQ(bp_iter))); 859 ASSERT(bp_iter != vdp->xdf_i_act); 860 } 861 bp_iter->av_forw = bp->av_forw; 862 if (bp == vdp->xdf_l_act) 863 vdp->xdf_l_act = bp_iter; 864 } 865 bp->av_forw = NULL; 866 } 867 868 static buf_t * 869 xdf_bp_next(xdf_t *vdp) 870 { 871 v_req_t *vreq; 872 buf_t *bp; 873 874 if (vdp->xdf_state == XD_CONNECTED) { 875 /* 876 * If we're in the XD_CONNECTED state, we only service IOs 877 * from the xdf_ready_tq_thread thread. 878 */ 879 if ((bp = vdp->xdf_ready_tq_bp) == NULL) 880 return (NULL); 881 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 882 return (bp); 883 return (NULL); 884 } 885 886 /* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */ 887 if (vdp->xdf_state != XD_READY) 888 return (NULL); 889 890 ASSERT(vdp->xdf_ready_tq_bp == NULL); 891 for (;;) { 892 if ((bp = vdp->xdf_i_act) == NULL) 893 return (NULL); 894 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 895 return (bp); 896 897 /* advance the active buf index pointer */ 898 vdp->xdf_i_act = bp->av_forw; 899 } 900 } 901 902 static void 903 xdf_io_fini(xdf_t *vdp, uint64_t id, int bioerr) 904 { 905 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 906 v_req_t *vreq = gs->gs_vreq; 907 buf_t *bp = vreq->v_buf; 908 909 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 910 ASSERT(BP_VREQ(bp) == vreq); 911 912 gs_free(gs); 913 914 if (bioerr != 0) 915 bioerror(bp, bioerr); 916 ASSERT(vreq->v_nslots > 0); 917 if (--vreq->v_nslots > 0) 918 return; 919 920 /* remove this IO from our active queue */ 921 xdf_bp_pop(vdp, bp); 922 923 ASSERT(vreq->v_runq); 924 xdf_kstat_exit(vdp, bp); 925 vreq->v_runq = B_FALSE; 926 vreq_free(vdp, vreq); 927 928 if (IS_ERROR(bp)) { 929 xdf_io_err(bp, geterror(bp), 0); 930 } else if (bp->b_resid != 0) { 931 /* Partial transfers are an error */ 932 xdf_io_err(bp, EIO, bp->b_resid); 933 } else { 934 biodone(bp); 935 } 936 } 937 938 /* 939 * xdf interrupt handler 940 */ 941 static uint_t 942 xdf_intr_locked(xdf_t *vdp) 943 { 944 xendev_ring_t *xbr; 945 blkif_response_t *resp; 946 int bioerr; 947 uint64_t id; 948 uint8_t op; 949 uint16_t status; 950 ddi_acc_handle_t acchdl; 951 952 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 953 954 if ((xbr = vdp->xdf_xb_ring) == NULL) 955 return (DDI_INTR_UNCLAIMED); 956 957 acchdl = vdp->xdf_xb_ring_hdl; 958 959 /* 960 * complete all requests which have a response 961 */ 962 while (resp = xvdi_ring_get_response(xbr)) { 963 id = ddi_get64(acchdl, &resp->id); 964 op = ddi_get8(acchdl, &resp->operation); 965 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 966 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 967 op, id, status)); 968 969 if (status != BLKIF_RSP_OKAY) { 970 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 971 vdp->xdf_addr, 972 (op == BLKIF_OP_READ) ? "reading" : "writing")); 973 bioerr = EIO; 974 } else { 975 bioerr = 0; 976 } 977 978 xdf_io_fini(vdp, id, bioerr); 979 } 980 return (DDI_INTR_CLAIMED); 981 } 982 983 /* 984 * xdf_intr runs at PIL 5, so no one else can grab xdf_dev_lk and 985 * block at a lower pil. 986 */ 987 static uint_t 988 xdf_intr(caddr_t arg) 989 { 990 xdf_t *vdp = (xdf_t *)arg; 991 int rv; 992 993 mutex_enter(&vdp->xdf_dev_lk); 994 rv = xdf_intr_locked(vdp); 995 mutex_exit(&vdp->xdf_dev_lk); 996 997 if (!do_polled_io) 998 xdf_io_start(vdp); 999 1000 return (rv); 1001 } 1002 1003 static void 1004 xdf_ring_push(xdf_t *vdp) 1005 { 1006 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1007 1008 if (vdp->xdf_xb_ring == NULL) 1009 return; 1010 1011 if (xvdi_ring_push_request(vdp->xdf_xb_ring)) { 1012 DPRINTF(IO_DBG, ( 1013 "xdf@%s: xdf_ring_push: sent request(s) to backend\n", 1014 vdp->xdf_addr)); 1015 } 1016 1017 if (xvdi_get_evtchn(vdp->xdf_dip) != INVALID_EVTCHN) 1018 xvdi_notify_oe(vdp->xdf_dip); 1019 } 1020 1021 static int 1022 xdf_ring_drain_locked(xdf_t *vdp) 1023 { 1024 int pollc, rv = 0; 1025 1026 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1027 1028 if (xdf_debug & SUSRES_DBG) 1029 xen_printf("xdf_ring_drain: start\n"); 1030 1031 for (pollc = 0; pollc < XDF_DRAIN_RETRY_COUNT; pollc++) { 1032 if (vdp->xdf_xb_ring == NULL) 1033 goto out; 1034 1035 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1036 (void) xdf_intr_locked(vdp); 1037 if (!xvdi_ring_has_incomp_request(vdp->xdf_xb_ring)) 1038 goto out; 1039 xdf_ring_push(vdp); 1040 1041 /* file-backed devices can be slow */ 1042 mutex_exit(&vdp->xdf_dev_lk); 1043 #ifdef XPV_HVM_DRIVER 1044 (void) HYPERVISOR_yield(); 1045 #endif /* XPV_HVM_DRIVER */ 1046 delay(drv_usectohz(XDF_DRAIN_MSEC_DELAY)); 1047 mutex_enter(&vdp->xdf_dev_lk); 1048 } 1049 cmn_err(CE_WARN, "xdf@%s: xdf_ring_drain: timeout", vdp->xdf_addr); 1050 1051 out: 1052 if (vdp->xdf_xb_ring != NULL) { 1053 if (xvdi_ring_has_incomp_request(vdp->xdf_xb_ring) || 1054 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1055 rv = EIO; 1056 } 1057 if (xdf_debug & SUSRES_DBG) 1058 xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n", 1059 vdp->xdf_addr, rv); 1060 return (rv); 1061 } 1062 1063 static int 1064 xdf_ring_drain(xdf_t *vdp) 1065 { 1066 int rv; 1067 mutex_enter(&vdp->xdf_dev_lk); 1068 rv = xdf_ring_drain_locked(vdp); 1069 mutex_exit(&vdp->xdf_dev_lk); 1070 return (rv); 1071 } 1072 1073 /* 1074 * Destroy all v_req_t, grant table entries, and our ring buffer. 1075 */ 1076 static void 1077 xdf_ring_destroy(xdf_t *vdp) 1078 { 1079 v_req_t *vreq; 1080 buf_t *bp; 1081 ge_slot_t *gs; 1082 1083 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1084 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1085 1086 if ((vdp->xdf_state != XD_INIT) && 1087 (vdp->xdf_state != XD_CONNECTED) && 1088 (vdp->xdf_state != XD_READY)) { 1089 ASSERT(vdp->xdf_xb_ring == NULL); 1090 ASSERT(vdp->xdf_xb_ring_hdl == NULL); 1091 ASSERT(vdp->xdf_peer == INVALID_DOMID); 1092 ASSERT(vdp->xdf_evtchn == INVALID_EVTCHN); 1093 ASSERT(list_is_empty(&vdp->xdf_vreq_act)); 1094 return; 1095 } 1096 1097 /* 1098 * We don't want to recieve async notifications from the backend 1099 * when it finishes processing ring entries. 1100 */ 1101 #ifdef XPV_HVM_DRIVER 1102 ec_unbind_evtchn(vdp->xdf_evtchn); 1103 #else /* !XPV_HVM_DRIVER */ 1104 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1105 #endif /* !XPV_HVM_DRIVER */ 1106 1107 /* 1108 * Drain any requests in the ring. We need to do this before we 1109 * can free grant table entries, because if active ring entries 1110 * point to grants, then the backend could be trying to access 1111 * those grants. 1112 */ 1113 (void) xdf_ring_drain_locked(vdp); 1114 1115 /* We're done talking to the backend so free up our event channel */ 1116 xvdi_free_evtchn(vdp->xdf_dip); 1117 vdp->xdf_evtchn = INVALID_EVTCHN; 1118 1119 while ((vreq = list_head(&vdp->xdf_vreq_act)) != NULL) { 1120 bp = vreq->v_buf; 1121 ASSERT(BP_VREQ(bp) == vreq); 1122 1123 /* Free up any grant table entries associaed with this IO */ 1124 while ((gs = list_head(&vreq->v_gs)) != NULL) 1125 gs_free(gs); 1126 1127 /* If this IO was on the runq, move it back to the waitq. */ 1128 if (vreq->v_runq) 1129 xdf_kstat_runq_to_waitq(vdp, bp); 1130 1131 /* 1132 * Reset any buf IO state since we're going to re-issue the 1133 * IO when we reconnect. 1134 */ 1135 vreq_free(vdp, vreq); 1136 BP_VREQ_SET(bp, NULL); 1137 bioerror(bp, 0); 1138 } 1139 1140 /* reset the active queue index pointer */ 1141 vdp->xdf_i_act = vdp->xdf_f_act; 1142 1143 /* Destroy the ring */ 1144 xvdi_free_ring(vdp->xdf_xb_ring); 1145 vdp->xdf_xb_ring = NULL; 1146 vdp->xdf_xb_ring_hdl = NULL; 1147 vdp->xdf_peer = INVALID_DOMID; 1148 } 1149 1150 void 1151 xdfmin(struct buf *bp) 1152 { 1153 if (bp->b_bcount > xdf_maxphys) 1154 bp->b_bcount = xdf_maxphys; 1155 } 1156 1157 /* 1158 * Check if we have a pending "eject" media request. 1159 */ 1160 static int 1161 xdf_eject_pending(xdf_t *vdp) 1162 { 1163 dev_info_t *dip = vdp->xdf_dip; 1164 char *xsname, *str; 1165 1166 if (!vdp->xdf_media_req_supported) 1167 return (B_FALSE); 1168 1169 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1170 (xenbus_read_str(xsname, XBP_MEDIA_REQ, &str) != 0)) 1171 return (B_FALSE); 1172 1173 if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) { 1174 strfree(str); 1175 return (B_FALSE); 1176 } 1177 strfree(str); 1178 return (B_TRUE); 1179 } 1180 1181 /* 1182 * Generate a media request. 1183 */ 1184 static int 1185 xdf_media_req(xdf_t *vdp, char *req, boolean_t media_required) 1186 { 1187 dev_info_t *dip = vdp->xdf_dip; 1188 char *xsname; 1189 1190 /* 1191 * we can't be holding xdf_dev_lk because xenbus_printf() can 1192 * block while waiting for a PIL 1 interrupt message. this 1193 * would cause a deadlock with xdf_intr() which needs to grab 1194 * xdf_dev_lk as well and runs at PIL 5. 1195 */ 1196 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1197 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1198 1199 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1200 return (ENXIO); 1201 1202 /* Check if we support media requests */ 1203 if (!XD_IS_CD(vdp) || !vdp->xdf_media_req_supported) 1204 return (ENOTTY); 1205 1206 /* If an eject is pending then don't allow any new requests */ 1207 if (xdf_eject_pending(vdp)) 1208 return (ENXIO); 1209 1210 /* Make sure that there is media present */ 1211 if (media_required && (vdp->xdf_xdev_nblocks == 0)) 1212 return (ENXIO); 1213 1214 /* We only allow operations when the device is ready and connected */ 1215 if (vdp->xdf_state != XD_READY) 1216 return (EIO); 1217 1218 if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ, "%s", req) != 0) 1219 return (EIO); 1220 1221 return (0); 1222 } 1223 1224 /* 1225 * populate a single blkif_request_t w/ a buf 1226 */ 1227 static void 1228 xdf_process_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1229 { 1230 grant_ref_t gr; 1231 uint8_t fsect, lsect; 1232 size_t bcnt; 1233 paddr_t dma_addr; 1234 off_t blk_off; 1235 dev_info_t *dip = vdp->xdf_dip; 1236 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1237 v_req_t *vreq = BP_VREQ(bp); 1238 uint64_t blkno = vreq->v_blkno; 1239 uint_t ndmacs = vreq->v_ndmacs; 1240 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1241 int seg = 0; 1242 int isread = IS_READ(bp); 1243 ge_slot_t *gs = list_head(&vreq->v_gs); 1244 1245 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1246 ASSERT(vreq->v_status == VREQ_GS_ALLOCED); 1247 1248 if (isread) 1249 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1250 else { 1251 switch (vreq->v_flush_diskcache) { 1252 case FLUSH_DISKCACHE: 1253 ddi_put8(acchdl, &rreq->operation, 1254 BLKIF_OP_FLUSH_DISKCACHE); 1255 ddi_put16(acchdl, &rreq->handle, vdev); 1256 ddi_put64(acchdl, &rreq->id, 1257 (uint64_t)(uintptr_t)(gs)); 1258 ddi_put8(acchdl, &rreq->nr_segments, 0); 1259 vreq->v_status = VREQ_DMAWIN_DONE; 1260 return; 1261 case WRITE_BARRIER: 1262 ddi_put8(acchdl, &rreq->operation, 1263 BLKIF_OP_WRITE_BARRIER); 1264 break; 1265 default: 1266 if (!vdp->xdf_wce) 1267 ddi_put8(acchdl, &rreq->operation, 1268 BLKIF_OP_WRITE_BARRIER); 1269 else 1270 ddi_put8(acchdl, &rreq->operation, 1271 BLKIF_OP_WRITE); 1272 break; 1273 } 1274 } 1275 1276 ddi_put16(acchdl, &rreq->handle, vdev); 1277 ddi_put64(acchdl, &rreq->sector_number, blkno); 1278 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(gs)); 1279 1280 /* 1281 * loop until all segments are populated or no more dma cookie in buf 1282 */ 1283 for (;;) { 1284 /* 1285 * Each segment of a blkif request can transfer up to 1286 * one 4K page of data. 1287 */ 1288 bcnt = vreq->v_dmac.dmac_size; 1289 dma_addr = vreq->v_dmac.dmac_laddress; 1290 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1291 fsect = blk_off >> XB_BSHIFT; 1292 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1293 1294 ASSERT(bcnt <= PAGESIZE); 1295 ASSERT((bcnt % XB_BSIZE) == 0); 1296 ASSERT((blk_off & XB_BMASK) == 0); 1297 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1298 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1299 1300 gr = gs_grant(gs, PATOMA(dma_addr) >> PAGESHIFT); 1301 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1302 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1303 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1304 1305 DPRINTF(IO_DBG, ( 1306 "xdf@%s: seg%d: dmacS %lu blk_off %ld\n", 1307 vdp->xdf_addr, seg, vreq->v_dmac.dmac_size, blk_off)); 1308 DPRINTF(IO_DBG, ( 1309 "xdf@%s: seg%d: fs %d ls %d gr %d dma 0x%"PRIx64"\n", 1310 vdp->xdf_addr, seg, fsect, lsect, gr, dma_addr)); 1311 1312 blkno += (bcnt >> XB_BSHIFT); 1313 seg++; 1314 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1315 if (--ndmacs) { 1316 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1317 continue; 1318 } 1319 1320 vreq->v_status = VREQ_DMAWIN_DONE; 1321 vreq->v_blkno = blkno; 1322 break; 1323 } 1324 ddi_put8(acchdl, &rreq->nr_segments, seg); 1325 DPRINTF(IO_DBG, ( 1326 "xdf@%s: xdf_process_rreq: request id=%"PRIx64" ready\n", 1327 vdp->xdf_addr, rreq->id)); 1328 } 1329 1330 static void 1331 xdf_io_start(xdf_t *vdp) 1332 { 1333 struct buf *bp; 1334 v_req_t *vreq; 1335 blkif_request_t *rreq; 1336 boolean_t rreqready = B_FALSE; 1337 1338 mutex_enter(&vdp->xdf_dev_lk); 1339 1340 /* 1341 * Populate the ring request(s). Loop until there is no buf to 1342 * transfer or no free slot available in I/O ring. 1343 */ 1344 for (;;) { 1345 /* don't start any new IO if we're suspending */ 1346 if (vdp->xdf_suspending) 1347 break; 1348 if ((bp = xdf_bp_next(vdp)) == NULL) 1349 break; 1350 1351 /* if the buf doesn't already have a vreq, allocate one */ 1352 if (((vreq = BP_VREQ(bp)) == NULL) && 1353 ((vreq = vreq_get(vdp, bp)) == NULL)) 1354 break; 1355 1356 /* alloc DMA/GTE resources */ 1357 if (vreq_setup(vdp, vreq) != DDI_SUCCESS) 1358 break; 1359 1360 /* get next blkif_request in the ring */ 1361 if ((rreq = xvdi_ring_get_request(vdp->xdf_xb_ring)) == NULL) 1362 break; 1363 bzero(rreq, sizeof (blkif_request_t)); 1364 rreqready = B_TRUE; 1365 1366 /* populate blkif_request with this buf */ 1367 xdf_process_rreq(vdp, bp, rreq); 1368 1369 /* 1370 * This buffer/vreq pair is has been allocated a ring buffer 1371 * resources, so if it isn't already in our runq, add it. 1372 */ 1373 if (!vreq->v_runq) 1374 xdf_kstat_waitq_to_runq(vdp, bp); 1375 } 1376 1377 /* Send the request(s) to the backend */ 1378 if (rreqready) 1379 xdf_ring_push(vdp); 1380 1381 mutex_exit(&vdp->xdf_dev_lk); 1382 } 1383 1384 1385 /* check if partition is open, -1 - check all partitions on the disk */ 1386 static boolean_t 1387 xdf_isopen(xdf_t *vdp, int partition) 1388 { 1389 int i; 1390 ulong_t parbit; 1391 boolean_t rval = B_FALSE; 1392 1393 ASSERT((partition == -1) || 1394 ((partition >= 0) || (partition < XDF_PEXT))); 1395 1396 if (partition == -1) 1397 parbit = (ulong_t)-1; 1398 else 1399 parbit = 1 << partition; 1400 1401 for (i = 0; i < OTYPCNT; i++) { 1402 if (vdp->xdf_vd_open[i] & parbit) 1403 rval = B_TRUE; 1404 } 1405 1406 return (rval); 1407 } 1408 1409 /* 1410 * The connection should never be closed as long as someone is holding 1411 * us open, there is pending IO, or someone is waiting waiting for a 1412 * connection. 1413 */ 1414 static boolean_t 1415 xdf_busy(xdf_t *vdp) 1416 { 1417 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1418 1419 if ((vdp->xdf_xb_ring != NULL) && 1420 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 1421 ASSERT(vdp->xdf_state != XD_CLOSED); 1422 return (B_TRUE); 1423 } 1424 1425 if (!list_is_empty(&vdp->xdf_vreq_act) || (vdp->xdf_f_act != NULL)) { 1426 ASSERT(vdp->xdf_state != XD_CLOSED); 1427 return (B_TRUE); 1428 } 1429 1430 if (xdf_isopen(vdp, -1)) { 1431 ASSERT(vdp->xdf_state != XD_CLOSED); 1432 return (B_TRUE); 1433 } 1434 1435 if (vdp->xdf_connect_req > 0) { 1436 ASSERT(vdp->xdf_state != XD_CLOSED); 1437 return (B_TRUE); 1438 } 1439 1440 return (B_FALSE); 1441 } 1442 1443 static void 1444 xdf_set_state(xdf_t *vdp, xdf_state_t new_state) 1445 { 1446 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1447 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1448 DPRINTF(DDI_DBG, ("xdf@%s: state change %d -> %d\n", 1449 vdp->xdf_addr, vdp->xdf_state, new_state)); 1450 vdp->xdf_state = new_state; 1451 cv_broadcast(&vdp->xdf_dev_cv); 1452 } 1453 1454 static void 1455 xdf_disconnect(xdf_t *vdp, xdf_state_t new_state, boolean_t quiet) 1456 { 1457 dev_info_t *dip = vdp->xdf_dip; 1458 boolean_t busy; 1459 1460 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1461 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1462 ASSERT((new_state == XD_UNKNOWN) || (new_state == XD_CLOSED)); 1463 1464 /* Check if we're already there. */ 1465 if (vdp->xdf_state == new_state) 1466 return; 1467 1468 mutex_enter(&vdp->xdf_dev_lk); 1469 busy = xdf_busy(vdp); 1470 1471 /* If we're already closed then there's nothing todo. */ 1472 if (vdp->xdf_state == XD_CLOSED) { 1473 ASSERT(!busy); 1474 xdf_set_state(vdp, new_state); 1475 mutex_exit(&vdp->xdf_dev_lk); 1476 return; 1477 } 1478 1479 #ifdef DEBUG 1480 /* UhOh. Warn the user that something bad has happened. */ 1481 if (!quiet && busy && (vdp->xdf_state == XD_READY) && 1482 (vdp->xdf_xdev_nblocks != 0)) { 1483 cmn_err(CE_WARN, "xdf@%s: disconnected while in use", 1484 vdp->xdf_addr); 1485 } 1486 #endif /* DEBUG */ 1487 1488 xdf_ring_destroy(vdp); 1489 1490 /* If we're busy then we can only go into the unknown state */ 1491 xdf_set_state(vdp, (busy) ? XD_UNKNOWN : new_state); 1492 mutex_exit(&vdp->xdf_dev_lk); 1493 1494 /* if we're closed now, let the other end know */ 1495 if (vdp->xdf_state == XD_CLOSED) 1496 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1497 } 1498 1499 1500 /* 1501 * Kick-off connect process 1502 * Status should be XD_UNKNOWN or XD_CLOSED 1503 * On success, status will be changed to XD_INIT 1504 * On error, it will be changed to XD_UNKNOWN 1505 */ 1506 static int 1507 xdf_setstate_init(xdf_t *vdp) 1508 { 1509 dev_info_t *dip = vdp->xdf_dip; 1510 xenbus_transaction_t xbt; 1511 grant_ref_t gref; 1512 char *xsname, *str; 1513 int rv; 1514 1515 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1516 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1517 ASSERT((vdp->xdf_state == XD_UNKNOWN) || 1518 (vdp->xdf_state == XD_CLOSED)); 1519 1520 DPRINTF(DDI_DBG, 1521 ("xdf@%s: starting connection process\n", vdp->xdf_addr)); 1522 1523 /* 1524 * If an eject is pending then don't allow a new connection. 1525 * (Only the backend can clear media request eject request.) 1526 */ 1527 if (xdf_eject_pending(vdp)) 1528 return (DDI_FAILURE); 1529 1530 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1531 goto errout; 1532 1533 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == INVALID_DOMID) 1534 goto errout; 1535 1536 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialising); 1537 1538 /* 1539 * Sanity check for the existance of the xenbus device-type property. 1540 * This property might not exist if we our xenbus device nodes was 1541 * force destroyed while we were still connected to the backend. 1542 */ 1543 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) 1544 goto errout; 1545 strfree(str); 1546 1547 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) 1548 goto errout; 1549 1550 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1551 #ifdef XPV_HVM_DRIVER 1552 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1553 #else /* !XPV_HVM_DRIVER */ 1554 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1555 DDI_SUCCESS) { 1556 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_init: " 1557 "failed to add intr handler", vdp->xdf_addr); 1558 goto errout1; 1559 } 1560 #endif /* !XPV_HVM_DRIVER */ 1561 1562 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1563 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1564 DDI_SUCCESS) { 1565 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1566 vdp->xdf_addr); 1567 goto errout2; 1568 } 1569 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1570 1571 /* 1572 * Write into xenstore the info needed by backend 1573 */ 1574 trans_retry: 1575 if (xenbus_transaction_start(&xbt)) { 1576 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1577 vdp->xdf_addr); 1578 xvdi_fatal_error(dip, EIO, "connect transaction init"); 1579 goto fail_trans; 1580 } 1581 1582 /* 1583 * XBP_PROTOCOL is written by the domain builder in the case of PV 1584 * domains. However, it is not written for HVM domains, so let's 1585 * write it here. 1586 */ 1587 if (((rv = xenbus_printf(xbt, xsname, 1588 XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE)) != 0) || 1589 ((rv = xenbus_printf(xbt, xsname, 1590 XBP_RING_REF, "%u", gref)) != 0) || 1591 ((rv = xenbus_printf(xbt, xsname, 1592 XBP_EVENT_CHAN, "%u", vdp->xdf_evtchn)) != 0) || 1593 ((rv = xenbus_printf(xbt, xsname, 1594 XBP_PROTOCOL, "%s", XEN_IO_PROTO_ABI_NATIVE)) != 0) || 1595 ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0)) { 1596 (void) xenbus_transaction_end(xbt, 1); 1597 xvdi_fatal_error(dip, rv, "connect transaction setup"); 1598 goto fail_trans; 1599 } 1600 1601 /* kick-off connect process */ 1602 if (rv = xenbus_transaction_end(xbt, 0)) { 1603 if (rv == EAGAIN) 1604 goto trans_retry; 1605 xvdi_fatal_error(dip, rv, "connect transaction commit"); 1606 goto fail_trans; 1607 } 1608 1609 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1610 mutex_enter(&vdp->xdf_dev_lk); 1611 xdf_set_state(vdp, XD_INIT); 1612 mutex_exit(&vdp->xdf_dev_lk); 1613 1614 return (DDI_SUCCESS); 1615 1616 fail_trans: 1617 xvdi_free_ring(vdp->xdf_xb_ring); 1618 errout2: 1619 #ifdef XPV_HVM_DRIVER 1620 ec_unbind_evtchn(vdp->xdf_evtchn); 1621 #else /* !XPV_HVM_DRIVER */ 1622 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1623 #endif /* !XPV_HVM_DRIVER */ 1624 errout1: 1625 xvdi_free_evtchn(dip); 1626 vdp->xdf_evtchn = INVALID_EVTCHN; 1627 errout: 1628 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1629 cmn_err(CE_WARN, "xdf@%s: failed to start connection to backend", 1630 vdp->xdf_addr); 1631 return (DDI_FAILURE); 1632 } 1633 1634 int 1635 xdf_get_flush_block(xdf_t *vdp) 1636 { 1637 /* 1638 * Get a DEV_BSIZE aligned bufer 1639 */ 1640 vdp->xdf_flush_mem = kmem_alloc(vdp->xdf_xdev_secsize * 2, KM_SLEEP); 1641 vdp->xdf_cache_flush_block = 1642 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), 1643 (int)vdp->xdf_xdev_secsize); 1644 1645 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1646 xdf_flush_block, vdp->xdf_xdev_secsize, NULL) != 0) 1647 return (DDI_FAILURE); 1648 return (DDI_SUCCESS); 1649 } 1650 1651 static void 1652 xdf_setstate_ready(void *arg) 1653 { 1654 xdf_t *vdp = (xdf_t *)arg; 1655 1656 vdp->xdf_ready_tq_thread = curthread; 1657 1658 /* 1659 * We've created all the minor nodes via cmlb_attach() using default 1660 * value in xdf_attach() to make it possible to block in xdf_open(), 1661 * in case there's anyone (say, booting thread) ever trying to open 1662 * it before connected to backend. We will refresh all those minor 1663 * nodes w/ latest info we've got now when we are almost connected. 1664 */ 1665 mutex_enter(&vdp->xdf_dev_lk); 1666 if (vdp->xdf_cmbl_reattach) { 1667 vdp->xdf_cmbl_reattach = B_FALSE; 1668 1669 mutex_exit(&vdp->xdf_dev_lk); 1670 if (xdf_cmlb_attach(vdp) != 0) { 1671 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1672 return; 1673 } 1674 mutex_enter(&vdp->xdf_dev_lk); 1675 } 1676 1677 /* If we're not still trying to get to the ready state, then bail. */ 1678 if (vdp->xdf_state != XD_CONNECTED) { 1679 mutex_exit(&vdp->xdf_dev_lk); 1680 return; 1681 } 1682 mutex_exit(&vdp->xdf_dev_lk); 1683 1684 /* 1685 * If backend has feature-barrier, see if it supports disk 1686 * cache flush op. 1687 */ 1688 vdp->xdf_flush_supported = B_FALSE; 1689 if (vdp->xdf_feature_barrier) { 1690 /* 1691 * Pretend we already know flush is supported so probe 1692 * will attempt the correct op. 1693 */ 1694 vdp->xdf_flush_supported = B_TRUE; 1695 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1696 vdp->xdf_flush_supported = B_TRUE; 1697 } else { 1698 vdp->xdf_flush_supported = B_FALSE; 1699 /* 1700 * If the other end does not support the cache flush op 1701 * then we must use a barrier-write to force disk 1702 * cache flushing. Barrier writes require that a data 1703 * block actually be written. 1704 * Cache a block to barrier-write when we are 1705 * asked to perform a flush. 1706 * XXX - would it be better to just copy 1 block 1707 * (512 bytes) from whatever write we did last 1708 * and rewrite that block? 1709 */ 1710 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) { 1711 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1712 return; 1713 } 1714 } 1715 } 1716 1717 mutex_enter(&vdp->xdf_cb_lk); 1718 mutex_enter(&vdp->xdf_dev_lk); 1719 if (vdp->xdf_state == XD_CONNECTED) 1720 xdf_set_state(vdp, XD_READY); 1721 mutex_exit(&vdp->xdf_dev_lk); 1722 1723 /* Restart any currently queued up io */ 1724 xdf_io_start(vdp); 1725 1726 mutex_exit(&vdp->xdf_cb_lk); 1727 } 1728 1729 /* 1730 * synthetic geometry 1731 */ 1732 #define XDF_NSECTS 256 1733 #define XDF_NHEADS 16 1734 1735 static void 1736 xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp) 1737 { 1738 xdf_t *vdp; 1739 uint_t ncyl; 1740 1741 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 1742 1743 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); 1744 1745 bzero(geomp, sizeof (*geomp)); 1746 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1747 geomp->g_acyl = 0; 1748 geomp->g_nhead = XDF_NHEADS; 1749 geomp->g_nsect = XDF_NSECTS; 1750 geomp->g_secsize = vdp->xdf_xdev_secsize; 1751 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1752 geomp->g_intrlv = 0; 1753 geomp->g_rpm = 7200; 1754 } 1755 1756 /* 1757 * Finish other initialization after we've connected to backend 1758 * Status should be XD_INIT before calling this routine 1759 * On success, status should be changed to XD_CONNECTED. 1760 * On error, status should stay XD_INIT 1761 */ 1762 static int 1763 xdf_setstate_connected(xdf_t *vdp) 1764 { 1765 dev_info_t *dip = vdp->xdf_dip; 1766 cmlb_geom_t pgeom; 1767 diskaddr_t nblocks = 0; 1768 uint_t secsize = 0; 1769 char *oename, *xsname, *str; 1770 uint_t dinfo; 1771 1772 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1773 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1774 ASSERT(vdp->xdf_state == XD_INIT); 1775 1776 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1777 ((oename = xvdi_get_oename(dip)) == NULL)) 1778 return (DDI_FAILURE); 1779 1780 /* Make sure the other end is XenbusStateConnected */ 1781 if (xenbus_read_driver_state(oename) != XenbusStateConnected) 1782 return (DDI_FAILURE); 1783 1784 /* Determine if feature barrier is supported by backend */ 1785 if (!(vdp->xdf_feature_barrier = xenbus_exists(oename, XBP_FB))) 1786 cmn_err(CE_NOTE, "!xdf@%s: feature-barrier not supported", 1787 vdp->xdf_addr); 1788 1789 /* 1790 * Probe backend. Read the device size into xdf_xdev_nblocks 1791 * and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE 1792 * flags in xdf_dinfo. If the emulated device type is "cdrom", 1793 * we always set VDISK_CDROM, regardless of if it's present in 1794 * the xenbus info parameter. 1795 */ 1796 if (xenbus_gather(XBT_NULL, oename, 1797 XBP_SECTORS, "%"SCNu64, &nblocks, 1798 XBP_SECTOR_SIZE, "%u", &secsize, 1799 XBP_INFO, "%u", &dinfo, 1800 NULL) != 0) { 1801 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1802 "cannot read backend info", vdp->xdf_addr); 1803 return (DDI_FAILURE); 1804 } 1805 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 1806 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 1807 vdp->xdf_addr); 1808 return (DDI_FAILURE); 1809 } 1810 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 1811 dinfo |= VDISK_CDROM; 1812 strfree(str); 1813 1814 if (secsize == 0 || !(ISP2(secsize / DEV_BSIZE))) 1815 secsize = DEV_BSIZE; 1816 vdp->xdf_xdev_nblocks = nblocks; 1817 vdp->xdf_xdev_secsize = secsize; 1818 #ifdef _ILP32 1819 if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) { 1820 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1821 "backend disk device too large with %llu blocks for" 1822 " 32-bit kernel", vdp->xdf_addr, vdp->xdf_xdev_nblocks); 1823 xvdi_fatal_error(dip, EFBIG, "reading backend info"); 1824 return (DDI_FAILURE); 1825 } 1826 #endif 1827 1828 /* 1829 * If the physical geometry for a fixed disk has been explicity 1830 * set then make sure that the specified physical geometry isn't 1831 * larger than the device we connected to. 1832 */ 1833 if (vdp->xdf_pgeom_fixed && 1834 (vdp->xdf_pgeom.g_capacity > vdp->xdf_xdev_nblocks)) { 1835 cmn_err(CE_WARN, 1836 "xdf@%s: connect failed, fixed geometry too large", 1837 vdp->xdf_addr); 1838 return (DDI_FAILURE); 1839 } 1840 1841 vdp->xdf_media_req_supported = xenbus_exists(oename, XBP_MEDIA_REQ_SUP); 1842 1843 /* mark vbd is ready for I/O */ 1844 mutex_enter(&vdp->xdf_dev_lk); 1845 xdf_set_state(vdp, XD_CONNECTED); 1846 1847 /* check if the cmlb label should be updated */ 1848 xdf_synthetic_pgeom(dip, &pgeom); 1849 if ((vdp->xdf_dinfo != dinfo) || 1850 (!vdp->xdf_pgeom_fixed && 1851 (memcmp(&vdp->xdf_pgeom, &pgeom, sizeof (pgeom)) != 0))) { 1852 vdp->xdf_cmbl_reattach = B_TRUE; 1853 1854 vdp->xdf_dinfo = dinfo; 1855 if (!vdp->xdf_pgeom_fixed) 1856 vdp->xdf_pgeom = pgeom; 1857 } 1858 1859 if (XD_IS_CD(vdp) || XD_IS_RM(vdp)) { 1860 if (vdp->xdf_xdev_nblocks == 0) { 1861 vdp->xdf_mstate = DKIO_EJECTED; 1862 cv_broadcast(&vdp->xdf_mstate_cv); 1863 } else { 1864 vdp->xdf_mstate = DKIO_INSERTED; 1865 cv_broadcast(&vdp->xdf_mstate_cv); 1866 } 1867 } else { 1868 if (vdp->xdf_mstate != DKIO_NONE) { 1869 vdp->xdf_mstate = DKIO_NONE; 1870 cv_broadcast(&vdp->xdf_mstate_cv); 1871 } 1872 } 1873 1874 mutex_exit(&vdp->xdf_dev_lk); 1875 1876 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", vdp->xdf_addr, 1877 (uint64_t)vdp->xdf_xdev_nblocks); 1878 1879 /* Restart any currently queued up io */ 1880 xdf_io_start(vdp); 1881 1882 /* 1883 * To get to the ready state we have to do IO to the backend device, 1884 * but we can't initiate IO from the other end change callback thread 1885 * (which is the current context we're executing in.) This is because 1886 * if the other end disconnects while we're doing IO from the callback 1887 * thread, then we can't recieve that disconnect event and we hang 1888 * waiting for an IO that can never complete. 1889 */ 1890 (void) ddi_taskq_dispatch(vdp->xdf_ready_tq, xdf_setstate_ready, vdp, 1891 DDI_SLEEP); 1892 1893 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1894 return (DDI_SUCCESS); 1895 } 1896 1897 /*ARGSUSED*/ 1898 static void 1899 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 1900 { 1901 XenbusState new_state = *(XenbusState *)impl_data; 1902 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 1903 1904 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 1905 vdp->xdf_addr, new_state)); 1906 1907 mutex_enter(&vdp->xdf_cb_lk); 1908 1909 /* We assume that this callback is single threaded */ 1910 ASSERT(vdp->xdf_oe_change_thread == NULL); 1911 DEBUG_EVAL(vdp->xdf_oe_change_thread = curthread); 1912 1913 /* ignore any backend state changes if we're suspending/suspended */ 1914 if (vdp->xdf_suspending || (vdp->xdf_state == XD_SUSPEND)) { 1915 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1916 mutex_exit(&vdp->xdf_cb_lk); 1917 return; 1918 } 1919 1920 switch (new_state) { 1921 case XenbusStateUnknown: 1922 case XenbusStateInitialising: 1923 case XenbusStateInitWait: 1924 case XenbusStateInitialised: 1925 if (vdp->xdf_state == XD_INIT) 1926 break; 1927 1928 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1929 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1930 break; 1931 ASSERT(vdp->xdf_state == XD_INIT); 1932 break; 1933 1934 case XenbusStateConnected: 1935 if ((vdp->xdf_state == XD_CONNECTED) || 1936 (vdp->xdf_state == XD_READY)) 1937 break; 1938 1939 if (vdp->xdf_state != XD_INIT) { 1940 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1941 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1942 break; 1943 ASSERT(vdp->xdf_state == XD_INIT); 1944 } 1945 1946 if (xdf_setstate_connected(vdp) != DDI_SUCCESS) { 1947 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1948 break; 1949 } 1950 ASSERT(vdp->xdf_state == XD_CONNECTED); 1951 break; 1952 1953 case XenbusStateClosing: 1954 if (xdf_isopen(vdp, -1)) { 1955 cmn_err(CE_NOTE, 1956 "xdf@%s: hot-unplug failed, still in use", 1957 vdp->xdf_addr); 1958 break; 1959 } 1960 /*FALLTHROUGH*/ 1961 case XenbusStateClosed: 1962 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 1963 break; 1964 } 1965 1966 /* notify anybody waiting for oe state change */ 1967 cv_broadcast(&vdp->xdf_dev_cv); 1968 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1969 mutex_exit(&vdp->xdf_cb_lk); 1970 } 1971 1972 static int 1973 xdf_connect_locked(xdf_t *vdp, boolean_t wait) 1974 { 1975 int rv, timeouts = 0, reset = 20; 1976 1977 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1978 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1979 1980 /* we can't connect once we're in the closed state */ 1981 if (vdp->xdf_state == XD_CLOSED) 1982 return (XD_CLOSED); 1983 1984 vdp->xdf_connect_req++; 1985 while (vdp->xdf_state != XD_READY) { 1986 mutex_exit(&vdp->xdf_dev_lk); 1987 1988 /* only one thread at a time can be the connection thread */ 1989 if (vdp->xdf_connect_thread == NULL) 1990 vdp->xdf_connect_thread = curthread; 1991 1992 if (vdp->xdf_connect_thread == curthread) { 1993 if ((timeouts > 0) && ((timeouts % reset) == 0)) { 1994 /* 1995 * If we haven't establised a connection 1996 * within the reset time, then disconnect 1997 * so we can try again, and double the reset 1998 * time. The reset time starts at 2 sec. 1999 */ 2000 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 2001 reset *= 2; 2002 } 2003 if (vdp->xdf_state == XD_UNKNOWN) 2004 (void) xdf_setstate_init(vdp); 2005 if (vdp->xdf_state == XD_INIT) 2006 (void) xdf_setstate_connected(vdp); 2007 } 2008 2009 mutex_enter(&vdp->xdf_dev_lk); 2010 if (!wait || (vdp->xdf_state == XD_READY)) 2011 goto out; 2012 2013 mutex_exit((&vdp->xdf_cb_lk)); 2014 if (vdp->xdf_connect_thread != curthread) { 2015 rv = cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk); 2016 } else { 2017 /* delay for 0.1 sec */ 2018 rv = cv_reltimedwait_sig(&vdp->xdf_dev_cv, 2019 &vdp->xdf_dev_lk, drv_usectohz(100*1000), 2020 TR_CLOCK_TICK); 2021 if (rv == -1) 2022 timeouts++; 2023 } 2024 mutex_exit((&vdp->xdf_dev_lk)); 2025 mutex_enter((&vdp->xdf_cb_lk)); 2026 mutex_enter((&vdp->xdf_dev_lk)); 2027 if (rv == 0) 2028 goto out; 2029 } 2030 2031 out: 2032 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2033 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 2034 2035 if (vdp->xdf_connect_thread == curthread) { 2036 /* 2037 * wake up someone else so they can become the connection 2038 * thread. 2039 */ 2040 cv_signal(&vdp->xdf_dev_cv); 2041 vdp->xdf_connect_thread = NULL; 2042 } 2043 2044 /* Try to lock the media */ 2045 mutex_exit((&vdp->xdf_dev_lk)); 2046 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2047 mutex_enter((&vdp->xdf_dev_lk)); 2048 2049 vdp->xdf_connect_req--; 2050 return (vdp->xdf_state); 2051 } 2052 2053 static uint_t 2054 xdf_iorestart(caddr_t arg) 2055 { 2056 xdf_t *vdp = (xdf_t *)arg; 2057 2058 ASSERT(vdp != NULL); 2059 2060 mutex_enter(&vdp->xdf_dev_lk); 2061 ASSERT(ISDMACBON(vdp)); 2062 SETDMACBOFF(vdp); 2063 mutex_exit(&vdp->xdf_dev_lk); 2064 2065 xdf_io_start(vdp); 2066 2067 return (DDI_INTR_CLAIMED); 2068 } 2069 2070 #if defined(XPV_HVM_DRIVER) 2071 2072 typedef struct xdf_hvm_entry { 2073 list_node_t xdf_he_list; 2074 char *xdf_he_path; 2075 dev_info_t *xdf_he_dip; 2076 } xdf_hvm_entry_t; 2077 2078 static list_t xdf_hvm_list; 2079 static kmutex_t xdf_hvm_list_lock; 2080 2081 static xdf_hvm_entry_t * 2082 i_xdf_hvm_find(const char *path, dev_info_t *dip) 2083 { 2084 xdf_hvm_entry_t *i; 2085 2086 ASSERT((path != NULL) || (dip != NULL)); 2087 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); 2088 2089 i = list_head(&xdf_hvm_list); 2090 while (i != NULL) { 2091 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { 2092 i = list_next(&xdf_hvm_list, i); 2093 continue; 2094 } 2095 if ((dip != NULL) && (i->xdf_he_dip != dip)) { 2096 i = list_next(&xdf_hvm_list, i); 2097 continue; 2098 } 2099 break; 2100 } 2101 return (i); 2102 } 2103 2104 dev_info_t * 2105 xdf_hvm_hold(const char *path) 2106 { 2107 xdf_hvm_entry_t *i; 2108 dev_info_t *dip; 2109 2110 mutex_enter(&xdf_hvm_list_lock); 2111 i = i_xdf_hvm_find(path, NULL); 2112 if (i == NULL) { 2113 mutex_exit(&xdf_hvm_list_lock); 2114 return (B_FALSE); 2115 } 2116 ndi_hold_devi(dip = i->xdf_he_dip); 2117 mutex_exit(&xdf_hvm_list_lock); 2118 return (dip); 2119 } 2120 2121 static void 2122 xdf_hvm_add(dev_info_t *dip) 2123 { 2124 xdf_hvm_entry_t *i; 2125 char *path; 2126 2127 /* figure out the path for the dip */ 2128 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 2129 (void) ddi_pathname(dip, path); 2130 2131 i = kmem_alloc(sizeof (*i), KM_SLEEP); 2132 i->xdf_he_dip = dip; 2133 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); 2134 2135 mutex_enter(&xdf_hvm_list_lock); 2136 ASSERT(i_xdf_hvm_find(path, NULL) == NULL); 2137 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); 2138 list_insert_head(&xdf_hvm_list, i); 2139 mutex_exit(&xdf_hvm_list_lock); 2140 2141 kmem_free(path, MAXPATHLEN); 2142 } 2143 2144 static void 2145 xdf_hvm_rm(dev_info_t *dip) 2146 { 2147 xdf_hvm_entry_t *i; 2148 2149 mutex_enter(&xdf_hvm_list_lock); 2150 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); 2151 list_remove(&xdf_hvm_list, i); 2152 mutex_exit(&xdf_hvm_list_lock); 2153 2154 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); 2155 kmem_free(i, sizeof (*i)); 2156 } 2157 2158 static void 2159 xdf_hvm_init(void) 2160 { 2161 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), 2162 offsetof(xdf_hvm_entry_t, xdf_he_list)); 2163 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); 2164 } 2165 2166 static void 2167 xdf_hvm_fini(void) 2168 { 2169 ASSERT(list_head(&xdf_hvm_list) == NULL); 2170 list_destroy(&xdf_hvm_list); 2171 mutex_destroy(&xdf_hvm_list_lock); 2172 } 2173 2174 boolean_t 2175 xdf_hvm_connect(dev_info_t *dip) 2176 { 2177 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2178 char *oename, *str; 2179 int rv; 2180 2181 mutex_enter(&vdp->xdf_cb_lk); 2182 2183 /* 2184 * Before try to establish a connection we need to wait for the 2185 * backend hotplug scripts to have run. Once they are run the 2186 * "<oename>/hotplug-status" property will be set to "connected". 2187 */ 2188 for (;;) { 2189 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2190 2191 /* 2192 * Get the xenbus path to the backend device. Note that 2193 * we can't cache this path (and we look it up on each pass 2194 * through this loop) because it could change during 2195 * suspend, resume, and migration operations. 2196 */ 2197 if ((oename = xvdi_get_oename(dip)) == NULL) { 2198 mutex_exit(&vdp->xdf_cb_lk); 2199 return (B_FALSE); 2200 } 2201 2202 str = NULL; 2203 if ((xenbus_read_str(oename, XBP_HP_STATUS, &str) == 0) && 2204 (strcmp(str, XBV_HP_STATUS_CONN) == 0)) 2205 break; 2206 2207 if (str != NULL) 2208 strfree(str); 2209 2210 /* wait for an update to "<oename>/hotplug-status" */ 2211 if (cv_wait_sig(&vdp->xdf_hp_status_cv, &vdp->xdf_cb_lk) == 0) { 2212 /* we got interrupted by a signal */ 2213 mutex_exit(&vdp->xdf_cb_lk); 2214 return (B_FALSE); 2215 } 2216 } 2217 2218 /* Good news. The backend hotplug scripts have been run. */ 2219 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2220 ASSERT(strcmp(str, XBV_HP_STATUS_CONN) == 0); 2221 strfree(str); 2222 2223 /* 2224 * If we're emulating a cd device and if the backend doesn't support 2225 * media request opreations, then we're not going to bother trying 2226 * to establish a connection for a couple reasons. First off, media 2227 * requests support is required to support operations like eject and 2228 * media locking. Second, other backend platforms like Linux don't 2229 * support hvm pv cdrom access. They don't even have a backend pv 2230 * driver for cdrom device nodes, so we don't want to block forever 2231 * waiting for a connection to a backend driver that doesn't exist. 2232 */ 2233 if (XD_IS_CD(vdp) && !xenbus_exists(oename, XBP_MEDIA_REQ_SUP)) { 2234 mutex_exit(&vdp->xdf_cb_lk); 2235 return (B_FALSE); 2236 } 2237 2238 mutex_enter(&vdp->xdf_dev_lk); 2239 rv = xdf_connect_locked(vdp, B_TRUE); 2240 mutex_exit(&vdp->xdf_dev_lk); 2241 mutex_exit(&vdp->xdf_cb_lk); 2242 2243 return ((rv == XD_READY) ? B_TRUE : B_FALSE); 2244 } 2245 2246 int 2247 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2248 { 2249 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2250 2251 /* sanity check the requested physical geometry */ 2252 mutex_enter(&vdp->xdf_dev_lk); 2253 if ((geomp->g_secsize != XB_BSIZE) || 2254 (geomp->g_capacity == 0)) { 2255 mutex_exit(&vdp->xdf_dev_lk); 2256 return (EINVAL); 2257 } 2258 2259 /* 2260 * If we've already connected to the backend device then make sure 2261 * we're not defining a physical geometry larger than our backend 2262 * device. 2263 */ 2264 if ((vdp->xdf_xdev_nblocks != 0) && 2265 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { 2266 mutex_exit(&vdp->xdf_dev_lk); 2267 return (EINVAL); 2268 } 2269 2270 bzero(&vdp->xdf_pgeom, sizeof (vdp->xdf_pgeom)); 2271 vdp->xdf_pgeom.g_ncyl = geomp->g_ncyl; 2272 vdp->xdf_pgeom.g_acyl = geomp->g_acyl; 2273 vdp->xdf_pgeom.g_nhead = geomp->g_nhead; 2274 vdp->xdf_pgeom.g_nsect = geomp->g_nsect; 2275 vdp->xdf_pgeom.g_secsize = geomp->g_secsize; 2276 vdp->xdf_pgeom.g_capacity = geomp->g_capacity; 2277 vdp->xdf_pgeom.g_intrlv = geomp->g_intrlv; 2278 vdp->xdf_pgeom.g_rpm = geomp->g_rpm; 2279 2280 vdp->xdf_pgeom_fixed = B_TRUE; 2281 mutex_exit(&vdp->xdf_dev_lk); 2282 2283 /* force a re-validation */ 2284 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 2285 2286 return (0); 2287 } 2288 2289 boolean_t 2290 xdf_is_cd(dev_info_t *dip) 2291 { 2292 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2293 boolean_t rv; 2294 2295 mutex_enter(&vdp->xdf_cb_lk); 2296 rv = XD_IS_CD(vdp); 2297 mutex_exit(&vdp->xdf_cb_lk); 2298 return (rv); 2299 } 2300 2301 boolean_t 2302 xdf_is_rm(dev_info_t *dip) 2303 { 2304 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2305 boolean_t rv; 2306 2307 mutex_enter(&vdp->xdf_cb_lk); 2308 rv = XD_IS_RM(vdp); 2309 mutex_exit(&vdp->xdf_cb_lk); 2310 return (rv); 2311 } 2312 2313 boolean_t 2314 xdf_media_req_supported(dev_info_t *dip) 2315 { 2316 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2317 boolean_t rv; 2318 2319 mutex_enter(&vdp->xdf_cb_lk); 2320 rv = vdp->xdf_media_req_supported; 2321 mutex_exit(&vdp->xdf_cb_lk); 2322 return (rv); 2323 } 2324 2325 #endif /* XPV_HVM_DRIVER */ 2326 2327 static int 2328 xdf_lb_getcap(dev_info_t *dip, diskaddr_t *capp) 2329 { 2330 xdf_t *vdp; 2331 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2332 2333 if (vdp == NULL) 2334 return (ENXIO); 2335 2336 mutex_enter(&vdp->xdf_dev_lk); 2337 *capp = vdp->xdf_pgeom.g_capacity; 2338 DPRINTF(LBL_DBG, ("xdf@%s:capacity %llu\n", vdp->xdf_addr, *capp)); 2339 mutex_exit(&vdp->xdf_dev_lk); 2340 return (0); 2341 } 2342 2343 static int 2344 xdf_lb_getpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2345 { 2346 xdf_t *vdp; 2347 2348 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 2349 return (ENXIO); 2350 *geomp = vdp->xdf_pgeom; 2351 return (0); 2352 } 2353 2354 /* 2355 * No real HBA, no geometry available from it 2356 */ 2357 /*ARGSUSED*/ 2358 static int 2359 xdf_lb_getvgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2360 { 2361 return (EINVAL); 2362 } 2363 2364 static int 2365 xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep) 2366 { 2367 xdf_t *vdp; 2368 2369 if (!(vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)))) 2370 return (ENXIO); 2371 2372 if (XD_IS_RO(vdp)) 2373 tgattributep->media_is_writable = 0; 2374 else 2375 tgattributep->media_is_writable = 1; 2376 tgattributep->media_is_rotational = 0; 2377 return (0); 2378 } 2379 2380 /* ARGSUSED3 */ 2381 int 2382 xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 2383 { 2384 int instance; 2385 xdf_t *vdp; 2386 2387 instance = ddi_get_instance(dip); 2388 2389 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) 2390 return (ENXIO); 2391 2392 switch (cmd) { 2393 case TG_GETPHYGEOM: 2394 return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg)); 2395 case TG_GETVIRTGEOM: 2396 return (xdf_lb_getvgeom(dip, (cmlb_geom_t *)arg)); 2397 case TG_GETCAPACITY: 2398 return (xdf_lb_getcap(dip, (diskaddr_t *)arg)); 2399 case TG_GETBLOCKSIZE: 2400 mutex_enter(&vdp->xdf_cb_lk); 2401 *(uint32_t *)arg = vdp->xdf_xdev_secsize; 2402 mutex_exit(&vdp->xdf_cb_lk); 2403 return (0); 2404 case TG_GETATTR: 2405 return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg)); 2406 default: 2407 return (ENOTTY); 2408 } 2409 } 2410 2411 /* ARGSUSED5 */ 2412 int 2413 xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp, 2414 diskaddr_t start, size_t reqlen, void *tg_cookie) 2415 { 2416 xdf_t *vdp; 2417 struct buf *bp; 2418 int err = 0; 2419 2420 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2421 2422 /* We don't allow IO from the oe_change callback thread */ 2423 ASSERT(curthread != vdp->xdf_oe_change_thread); 2424 2425 if ((start + ((reqlen / (vdp->xdf_xdev_secsize / DEV_BSIZE)) 2426 >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) 2427 return (EINVAL); 2428 2429 bp = getrbuf(KM_SLEEP); 2430 if (cmd == TG_READ) 2431 bp->b_flags = B_BUSY | B_READ; 2432 else 2433 bp->b_flags = B_BUSY | B_WRITE; 2434 2435 bp->b_un.b_addr = bufp; 2436 bp->b_bcount = reqlen; 2437 bp->b_blkno = start * (vdp->xdf_xdev_secsize / DEV_BSIZE); 2438 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 2439 2440 mutex_enter(&vdp->xdf_dev_lk); 2441 xdf_bp_push(vdp, bp); 2442 mutex_exit(&vdp->xdf_dev_lk); 2443 xdf_io_start(vdp); 2444 if (curthread == vdp->xdf_ready_tq_thread) 2445 (void) xdf_ring_drain(vdp); 2446 err = biowait(bp); 2447 ASSERT(bp->b_flags & B_DONE); 2448 freerbuf(bp); 2449 return (err); 2450 } 2451 2452 /* 2453 * Lock the current media. Set the media state to "lock". 2454 * (Media locks are only respected by the backend driver.) 2455 */ 2456 static int 2457 xdf_ioctl_mlock(xdf_t *vdp) 2458 { 2459 int rv; 2460 mutex_enter(&vdp->xdf_cb_lk); 2461 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2462 mutex_exit(&vdp->xdf_cb_lk); 2463 return (rv); 2464 } 2465 2466 /* 2467 * Release a media lock. Set the media state to "none". 2468 */ 2469 static int 2470 xdf_ioctl_munlock(xdf_t *vdp) 2471 { 2472 int rv; 2473 mutex_enter(&vdp->xdf_cb_lk); 2474 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_NONE, B_TRUE); 2475 mutex_exit(&vdp->xdf_cb_lk); 2476 return (rv); 2477 } 2478 2479 /* 2480 * Eject the current media. Ignores any media locks. (Media locks 2481 * are only for benifit of the the backend.) 2482 */ 2483 static int 2484 xdf_ioctl_eject(xdf_t *vdp) 2485 { 2486 int rv; 2487 2488 mutex_enter(&vdp->xdf_cb_lk); 2489 if ((rv = xdf_media_req(vdp, XBV_MEDIA_REQ_EJECT, B_FALSE)) != 0) { 2490 mutex_exit(&vdp->xdf_cb_lk); 2491 return (rv); 2492 } 2493 2494 /* 2495 * We've set the media requests xenbus parameter to eject, so now 2496 * disconnect from the backend, wait for the backend to clear 2497 * the media requets xenbus paramter, and then we can reconnect 2498 * to the backend. 2499 */ 2500 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 2501 mutex_enter(&vdp->xdf_dev_lk); 2502 if (xdf_connect_locked(vdp, B_TRUE) != XD_READY) { 2503 mutex_exit(&vdp->xdf_dev_lk); 2504 mutex_exit(&vdp->xdf_cb_lk); 2505 return (EIO); 2506 } 2507 mutex_exit(&vdp->xdf_dev_lk); 2508 mutex_exit(&vdp->xdf_cb_lk); 2509 return (0); 2510 } 2511 2512 /* 2513 * Watch for media state changes. This can be an insertion of a device 2514 * (triggered by a 'xm block-configure' request in another domain) or 2515 * the ejection of a device (triggered by a local "eject" operation). 2516 * For a full description of the DKIOCSTATE ioctl behavior see dkio(7I). 2517 */ 2518 static int 2519 xdf_dkstate(xdf_t *vdp, enum dkio_state mstate) 2520 { 2521 enum dkio_state prev_state; 2522 2523 mutex_enter(&vdp->xdf_cb_lk); 2524 prev_state = vdp->xdf_mstate; 2525 2526 if (vdp->xdf_mstate == mstate) { 2527 while (vdp->xdf_mstate == prev_state) { 2528 if (cv_wait_sig(&vdp->xdf_mstate_cv, 2529 &vdp->xdf_cb_lk) == 0) { 2530 mutex_exit(&vdp->xdf_cb_lk); 2531 return (EINTR); 2532 } 2533 } 2534 } 2535 2536 if ((prev_state != DKIO_INSERTED) && 2537 (vdp->xdf_mstate == DKIO_INSERTED)) { 2538 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2539 mutex_exit(&vdp->xdf_cb_lk); 2540 return (0); 2541 } 2542 2543 mutex_exit(&vdp->xdf_cb_lk); 2544 return (0); 2545 } 2546 2547 /*ARGSUSED*/ 2548 static int 2549 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 2550 int *rvalp) 2551 { 2552 minor_t minor = getminor(dev); 2553 int part = XDF_PART(minor); 2554 xdf_t *vdp; 2555 int rv; 2556 2557 if (((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) || 2558 (!xdf_isopen(vdp, part))) 2559 return (ENXIO); 2560 2561 DPRINTF(IOCTL_DBG, ("xdf@%s:ioctl: cmd %d (0x%x)\n", 2562 vdp->xdf_addr, cmd, cmd)); 2563 2564 switch (cmd) { 2565 default: 2566 return (ENOTTY); 2567 case DKIOCG_PHYGEOM: 2568 case DKIOCG_VIRTGEOM: 2569 case DKIOCGGEOM: 2570 case DKIOCSGEOM: 2571 case DKIOCGAPART: 2572 case DKIOCSAPART: 2573 case DKIOCGVTOC: 2574 case DKIOCSVTOC: 2575 case DKIOCPARTINFO: 2576 case DKIOCGEXTVTOC: 2577 case DKIOCSEXTVTOC: 2578 case DKIOCEXTPARTINFO: 2579 case DKIOCGMBOOT: 2580 case DKIOCSMBOOT: 2581 case DKIOCGETEFI: 2582 case DKIOCSETEFI: 2583 case DKIOCSETEXTPART: 2584 case DKIOCPARTITION: 2585 return (cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 2586 rvalp, NULL)); 2587 case FDEJECT: 2588 case DKIOCEJECT: 2589 case CDROMEJECT: 2590 return (xdf_ioctl_eject(vdp)); 2591 case DKIOCLOCK: 2592 return (xdf_ioctl_mlock(vdp)); 2593 case DKIOCUNLOCK: 2594 return (xdf_ioctl_munlock(vdp)); 2595 case CDROMREADOFFSET: { 2596 int offset = 0; 2597 if (!XD_IS_CD(vdp)) 2598 return (ENOTTY); 2599 if (ddi_copyout(&offset, (void *)arg, sizeof (int), mode)) 2600 return (EFAULT); 2601 return (0); 2602 } 2603 case DKIOCGMEDIAINFO: { 2604 struct dk_minfo media_info; 2605 2606 media_info.dki_lbsize = vdp->xdf_xdev_secsize; 2607 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; 2608 if (XD_IS_CD(vdp)) 2609 media_info.dki_media_type = DK_CDROM; 2610 else 2611 media_info.dki_media_type = DK_FIXED_DISK; 2612 2613 if (ddi_copyout(&media_info, (void *)arg, 2614 sizeof (struct dk_minfo), mode)) 2615 return (EFAULT); 2616 return (0); 2617 } 2618 case DKIOCINFO: { 2619 struct dk_cinfo info; 2620 2621 /* controller information */ 2622 if (XD_IS_CD(vdp)) 2623 info.dki_ctype = DKC_CDROM; 2624 else 2625 info.dki_ctype = DKC_VBD; 2626 2627 info.dki_cnum = 0; 2628 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 2629 2630 /* unit information */ 2631 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 2632 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 2633 info.dki_flags = DKI_FMTVOL; 2634 info.dki_partition = part; 2635 info.dki_maxtransfer = maxphys / DEV_BSIZE; 2636 info.dki_addr = 0; 2637 info.dki_space = 0; 2638 info.dki_prio = 0; 2639 info.dki_vec = 0; 2640 2641 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 2642 return (EFAULT); 2643 return (0); 2644 } 2645 case DKIOCSTATE: { 2646 enum dkio_state mstate; 2647 2648 if (ddi_copyin((void *)arg, &mstate, 2649 sizeof (mstate), mode) != 0) 2650 return (EFAULT); 2651 if ((rv = xdf_dkstate(vdp, mstate)) != 0) 2652 return (rv); 2653 mstate = vdp->xdf_mstate; 2654 if (ddi_copyout(&mstate, (void *)arg, 2655 sizeof (mstate), mode) != 0) 2656 return (EFAULT); 2657 return (0); 2658 } 2659 case DKIOCREMOVABLE: { 2660 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2661 if (ddi_copyout(&i, (caddr_t)arg, sizeof (i), mode)) 2662 return (EFAULT); 2663 return (0); 2664 } 2665 case DKIOCGETWCE: { 2666 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2667 if (ddi_copyout(&i, (void *)arg, sizeof (i), mode)) 2668 return (EFAULT); 2669 return (0); 2670 } 2671 case DKIOCSETWCE: { 2672 int i; 2673 if (ddi_copyin((void *)arg, &i, sizeof (i), mode)) 2674 return (EFAULT); 2675 vdp->xdf_wce = VOID2BOOLEAN(i); 2676 return (0); 2677 } 2678 case DKIOCFLUSHWRITECACHE: { 2679 struct dk_callback *dkc = (struct dk_callback *)arg; 2680 2681 if (vdp->xdf_flush_supported) { 2682 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2683 NULL, 0, 0, (void *)dev); 2684 } else if (vdp->xdf_feature_barrier && 2685 !xdf_barrier_flush_disable) { 2686 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2687 vdp->xdf_cache_flush_block, xdf_flush_block, 2688 vdp->xdf_xdev_secsize, (void *)dev); 2689 } else { 2690 return (ENOTTY); 2691 } 2692 if ((mode & FKIOCTL) && (dkc != NULL) && 2693 (dkc->dkc_callback != NULL)) { 2694 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 2695 /* need to return 0 after calling callback */ 2696 rv = 0; 2697 } 2698 return (rv); 2699 } 2700 } 2701 /*NOTREACHED*/ 2702 } 2703 2704 static int 2705 xdf_strategy(struct buf *bp) 2706 { 2707 xdf_t *vdp; 2708 minor_t minor; 2709 diskaddr_t p_blkct, p_blkst; 2710 daddr_t blkno; 2711 ulong_t nblks; 2712 int part; 2713 2714 minor = getminor(bp->b_edev); 2715 part = XDF_PART(minor); 2716 vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor)); 2717 2718 mutex_enter(&vdp->xdf_dev_lk); 2719 if (!xdf_isopen(vdp, part)) { 2720 mutex_exit(&vdp->xdf_dev_lk); 2721 xdf_io_err(bp, ENXIO, 0); 2722 return (0); 2723 } 2724 2725 /* We don't allow IO from the oe_change callback thread */ 2726 ASSERT(curthread != vdp->xdf_oe_change_thread); 2727 2728 /* Check for writes to a read only device */ 2729 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 2730 mutex_exit(&vdp->xdf_dev_lk); 2731 xdf_io_err(bp, EROFS, 0); 2732 return (0); 2733 } 2734 2735 /* Check if this I/O is accessing a partition or the entire disk */ 2736 if ((long)bp->b_private == XB_SLICE_NONE) { 2737 /* This I/O is using an absolute offset */ 2738 p_blkct = vdp->xdf_xdev_nblocks; 2739 p_blkst = 0; 2740 } else { 2741 /* This I/O is using a partition relative offset */ 2742 mutex_exit(&vdp->xdf_dev_lk); 2743 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 2744 &p_blkst, NULL, NULL, NULL)) { 2745 xdf_io_err(bp, ENXIO, 0); 2746 return (0); 2747 } 2748 mutex_enter(&vdp->xdf_dev_lk); 2749 } 2750 2751 /* 2752 * Adjust the real blkno and bcount according to the underline 2753 * physical sector size. 2754 */ 2755 blkno = bp->b_blkno / (vdp->xdf_xdev_secsize / XB_BSIZE); 2756 2757 /* check for a starting block beyond the disk or partition limit */ 2758 if (blkno > p_blkct) { 2759 DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64, 2760 vdp->xdf_addr, (longlong_t)blkno, (uint64_t)p_blkct)); 2761 mutex_exit(&vdp->xdf_dev_lk); 2762 xdf_io_err(bp, EINVAL, 0); 2763 return (0); 2764 } 2765 2766 /* Legacy: don't set error flag at this case */ 2767 if (blkno == p_blkct) { 2768 mutex_exit(&vdp->xdf_dev_lk); 2769 bp->b_resid = bp->b_bcount; 2770 biodone(bp); 2771 return (0); 2772 } 2773 2774 /* sanitize the input buf */ 2775 bioerror(bp, 0); 2776 bp->b_resid = 0; 2777 bp->av_back = bp->av_forw = NULL; 2778 2779 /* Adjust for partial transfer, this will result in an error later */ 2780 if (vdp->xdf_xdev_secsize != 0 && 2781 vdp->xdf_xdev_secsize != XB_BSIZE) { 2782 nblks = bp->b_bcount / vdp->xdf_xdev_secsize; 2783 } else { 2784 nblks = bp->b_bcount >> XB_BSHIFT; 2785 } 2786 2787 if ((blkno + nblks) > p_blkct) { 2788 if (vdp->xdf_xdev_secsize != 0 && 2789 vdp->xdf_xdev_secsize != XB_BSIZE) { 2790 bp->b_resid = 2791 ((blkno + nblks) - p_blkct) * 2792 vdp->xdf_xdev_secsize; 2793 } else { 2794 bp->b_resid = 2795 ((blkno + nblks) - p_blkct) << 2796 XB_BSHIFT; 2797 } 2798 bp->b_bcount -= bp->b_resid; 2799 } 2800 2801 DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n", 2802 vdp->xdf_addr, (longlong_t)blkno, (ulong_t)bp->b_bcount)); 2803 2804 /* Fix up the buf struct */ 2805 bp->b_flags |= B_BUSY; 2806 bp->b_private = (void *)(uintptr_t)p_blkst; 2807 2808 xdf_bp_push(vdp, bp); 2809 mutex_exit(&vdp->xdf_dev_lk); 2810 xdf_io_start(vdp); 2811 if (do_polled_io) 2812 (void) xdf_ring_drain(vdp); 2813 return (0); 2814 } 2815 2816 /*ARGSUSED*/ 2817 static int 2818 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 2819 { 2820 xdf_t *vdp; 2821 minor_t minor; 2822 diskaddr_t p_blkcnt; 2823 int part; 2824 2825 minor = getminor(dev); 2826 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2827 return (ENXIO); 2828 2829 DPRINTF(IO_DBG, ("xdf@%s: read offset 0x%"PRIx64"\n", 2830 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2831 2832 part = XDF_PART(minor); 2833 if (!xdf_isopen(vdp, part)) 2834 return (ENXIO); 2835 2836 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2837 NULL, NULL, NULL, NULL)) 2838 return (ENXIO); 2839 2840 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2841 return (ENOSPC); 2842 2843 if (U_INVAL(uiop)) 2844 return (EINVAL); 2845 2846 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 2847 } 2848 2849 /*ARGSUSED*/ 2850 static int 2851 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 2852 { 2853 xdf_t *vdp; 2854 minor_t minor; 2855 diskaddr_t p_blkcnt; 2856 int part; 2857 2858 minor = getminor(dev); 2859 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2860 return (ENXIO); 2861 2862 DPRINTF(IO_DBG, ("xdf@%s: write offset 0x%"PRIx64"\n", 2863 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2864 2865 part = XDF_PART(minor); 2866 if (!xdf_isopen(vdp, part)) 2867 return (ENXIO); 2868 2869 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2870 NULL, NULL, NULL, NULL)) 2871 return (ENXIO); 2872 2873 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2874 return (ENOSPC); 2875 2876 if (U_INVAL(uiop)) 2877 return (EINVAL); 2878 2879 return (physio(xdf_strategy, NULL, dev, B_WRITE, xdfmin, uiop)); 2880 } 2881 2882 /*ARGSUSED*/ 2883 static int 2884 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 2885 { 2886 xdf_t *vdp; 2887 minor_t minor; 2888 struct uio *uiop = aiop->aio_uio; 2889 diskaddr_t p_blkcnt; 2890 int part; 2891 2892 minor = getminor(dev); 2893 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2894 return (ENXIO); 2895 2896 part = XDF_PART(minor); 2897 if (!xdf_isopen(vdp, part)) 2898 return (ENXIO); 2899 2900 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2901 NULL, NULL, NULL, NULL)) 2902 return (ENXIO); 2903 2904 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2905 return (ENOSPC); 2906 2907 if (U_INVAL(uiop)) 2908 return (EINVAL); 2909 2910 return (aphysio(xdf_strategy, anocancel, dev, B_READ, xdfmin, aiop)); 2911 } 2912 2913 /*ARGSUSED*/ 2914 static int 2915 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 2916 { 2917 xdf_t *vdp; 2918 minor_t minor; 2919 struct uio *uiop = aiop->aio_uio; 2920 diskaddr_t p_blkcnt; 2921 int part; 2922 2923 minor = getminor(dev); 2924 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2925 return (ENXIO); 2926 2927 part = XDF_PART(minor); 2928 if (!xdf_isopen(vdp, part)) 2929 return (ENXIO); 2930 2931 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2932 NULL, NULL, NULL, NULL)) 2933 return (ENXIO); 2934 2935 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2936 return (ENOSPC); 2937 2938 if (U_INVAL(uiop)) 2939 return (EINVAL); 2940 2941 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, xdfmin, aiop)); 2942 } 2943 2944 static int 2945 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 2946 { 2947 struct buf dumpbuf, *dbp = &dumpbuf; 2948 xdf_t *vdp; 2949 minor_t minor; 2950 int err = 0; 2951 int part; 2952 diskaddr_t p_blkcnt, p_blkst; 2953 2954 minor = getminor(dev); 2955 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2956 return (ENXIO); 2957 2958 DPRINTF(IO_DBG, ("xdf@%s: dump addr (0x%p) blk (%ld) nblks (%d)\n", 2959 vdp->xdf_addr, (void *)addr, blkno, nblk)); 2960 2961 /* We don't allow IO from the oe_change callback thread */ 2962 ASSERT(curthread != vdp->xdf_oe_change_thread); 2963 2964 part = XDF_PART(minor); 2965 if (!xdf_isopen(vdp, part)) 2966 return (ENXIO); 2967 2968 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 2969 NULL, NULL, NULL)) 2970 return (ENXIO); 2971 2972 if ((blkno + nblk) > 2973 (p_blkcnt * (vdp->xdf_xdev_secsize / XB_BSIZE))) { 2974 cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64, 2975 vdp->xdf_addr, (daddr_t)((blkno + nblk) / 2976 (vdp->xdf_xdev_secsize / XB_BSIZE)), (uint64_t)p_blkcnt); 2977 return (EINVAL); 2978 } 2979 2980 bioinit(dbp); 2981 dbp->b_flags = B_BUSY; 2982 dbp->b_un.b_addr = addr; 2983 dbp->b_bcount = nblk << DEV_BSHIFT; 2984 dbp->b_blkno = blkno; 2985 dbp->b_edev = dev; 2986 dbp->b_private = (void *)(uintptr_t)p_blkst; 2987 2988 mutex_enter(&vdp->xdf_dev_lk); 2989 xdf_bp_push(vdp, dbp); 2990 mutex_exit(&vdp->xdf_dev_lk); 2991 xdf_io_start(vdp); 2992 err = xdf_ring_drain(vdp); 2993 biofini(dbp); 2994 return (err); 2995 } 2996 2997 /*ARGSUSED*/ 2998 static int 2999 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 3000 { 3001 minor_t minor; 3002 xdf_t *vdp; 3003 int part; 3004 ulong_t parbit; 3005 3006 minor = getminor(dev); 3007 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 3008 return (ENXIO); 3009 3010 mutex_enter(&vdp->xdf_dev_lk); 3011 part = XDF_PART(minor); 3012 if (!xdf_isopen(vdp, part)) { 3013 mutex_exit(&vdp->xdf_dev_lk); 3014 return (ENXIO); 3015 } 3016 parbit = 1 << part; 3017 3018 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); 3019 if (otyp == OTYP_LYR) { 3020 ASSERT(vdp->xdf_vd_lyropen[part] > 0); 3021 if (--vdp->xdf_vd_lyropen[part] == 0) 3022 vdp->xdf_vd_open[otyp] &= ~parbit; 3023 } else { 3024 vdp->xdf_vd_open[otyp] &= ~parbit; 3025 } 3026 vdp->xdf_vd_exclopen &= ~parbit; 3027 3028 mutex_exit(&vdp->xdf_dev_lk); 3029 return (0); 3030 } 3031 3032 static int 3033 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 3034 { 3035 minor_t minor; 3036 xdf_t *vdp; 3037 int part; 3038 ulong_t parbit; 3039 diskaddr_t p_blkct = 0; 3040 boolean_t firstopen; 3041 boolean_t nodelay; 3042 3043 minor = getminor(*devp); 3044 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 3045 return (ENXIO); 3046 3047 nodelay = (flag & (FNDELAY | FNONBLOCK)); 3048 3049 DPRINTF(DDI_DBG, ("xdf@%s: opening\n", vdp->xdf_addr)); 3050 3051 /* do cv_wait until connected or failed */ 3052 mutex_enter(&vdp->xdf_cb_lk); 3053 mutex_enter(&vdp->xdf_dev_lk); 3054 if (!nodelay && (xdf_connect_locked(vdp, B_TRUE) != XD_READY)) { 3055 mutex_exit(&vdp->xdf_dev_lk); 3056 mutex_exit(&vdp->xdf_cb_lk); 3057 return (ENXIO); 3058 } 3059 mutex_exit(&vdp->xdf_cb_lk); 3060 3061 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 3062 mutex_exit(&vdp->xdf_dev_lk); 3063 return (EROFS); 3064 } 3065 3066 part = XDF_PART(minor); 3067 parbit = 1 << part; 3068 if ((vdp->xdf_vd_exclopen & parbit) || 3069 ((flag & FEXCL) && xdf_isopen(vdp, part))) { 3070 mutex_exit(&vdp->xdf_dev_lk); 3071 return (EBUSY); 3072 } 3073 3074 /* are we the first one to open this node? */ 3075 firstopen = !xdf_isopen(vdp, -1); 3076 3077 if (otyp == OTYP_LYR) 3078 vdp->xdf_vd_lyropen[part]++; 3079 3080 vdp->xdf_vd_open[otyp] |= parbit; 3081 3082 if (flag & FEXCL) 3083 vdp->xdf_vd_exclopen |= parbit; 3084 3085 mutex_exit(&vdp->xdf_dev_lk); 3086 3087 /* force a re-validation */ 3088 if (firstopen) 3089 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 3090 3091 /* If this is a non-blocking open then we're done */ 3092 if (nodelay) 3093 return (0); 3094 3095 /* 3096 * This is a blocking open, so we require: 3097 * - that the disk have a valid label on it 3098 * - that the size of the partition that we're opening is non-zero 3099 */ 3100 if ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 3101 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0)) { 3102 (void) xdf_close(*devp, flag, otyp, credp); 3103 return (ENXIO); 3104 } 3105 3106 return (0); 3107 } 3108 3109 /*ARGSUSED*/ 3110 static void 3111 xdf_watch_hp_status_cb(dev_info_t *dip, const char *path, void *arg) 3112 { 3113 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 3114 cv_broadcast(&vdp->xdf_hp_status_cv); 3115 } 3116 3117 static int 3118 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 3119 char *name, caddr_t valuep, int *lengthp) 3120 { 3121 xdf_t *vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 3122 3123 /* 3124 * Sanity check that if a dev_t or dip were specified that they 3125 * correspond to this device driver. On debug kernels we'll 3126 * panic and on non-debug kernels we'll return failure. 3127 */ 3128 ASSERT(ddi_driver_major(dip) == xdf_major); 3129 ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdf_major)); 3130 if ((ddi_driver_major(dip) != xdf_major) || 3131 ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdf_major))) 3132 return (DDI_PROP_NOT_FOUND); 3133 3134 if (vdp == NULL) 3135 return (ddi_prop_op(dev, dip, prop_op, flags, 3136 name, valuep, lengthp)); 3137 3138 return (cmlb_prop_op(vdp->xdf_vd_lbl, 3139 dev, dip, prop_op, flags, name, valuep, lengthp, 3140 XDF_PART(getminor(dev)), NULL)); 3141 } 3142 3143 /*ARGSUSED*/ 3144 static int 3145 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 3146 { 3147 int instance = XDF_INST(getminor((dev_t)arg)); 3148 xdf_t *vbdp; 3149 3150 switch (cmd) { 3151 case DDI_INFO_DEVT2DEVINFO: 3152 if ((vbdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) { 3153 *rp = NULL; 3154 return (DDI_FAILURE); 3155 } 3156 *rp = vbdp->xdf_dip; 3157 return (DDI_SUCCESS); 3158 3159 case DDI_INFO_DEVT2INSTANCE: 3160 *rp = (void *)(uintptr_t)instance; 3161 return (DDI_SUCCESS); 3162 3163 default: 3164 return (DDI_FAILURE); 3165 } 3166 } 3167 3168 /*ARGSUSED*/ 3169 static int 3170 xdf_resume(dev_info_t *dip) 3171 { 3172 xdf_t *vdp; 3173 char *oename; 3174 3175 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 3176 goto err; 3177 3178 if (xdf_debug & SUSRES_DBG) 3179 xen_printf("xdf@%s: xdf_resume\n", vdp->xdf_addr); 3180 3181 mutex_enter(&vdp->xdf_cb_lk); 3182 3183 if (xvdi_resume(dip) != DDI_SUCCESS) { 3184 mutex_exit(&vdp->xdf_cb_lk); 3185 goto err; 3186 } 3187 3188 if (((oename = xvdi_get_oename(dip)) == NULL) || 3189 (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3190 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)) { 3191 mutex_exit(&vdp->xdf_cb_lk); 3192 goto err; 3193 } 3194 3195 mutex_enter(&vdp->xdf_dev_lk); 3196 ASSERT(vdp->xdf_state != XD_READY); 3197 xdf_set_state(vdp, XD_UNKNOWN); 3198 mutex_exit(&vdp->xdf_dev_lk); 3199 3200 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3201 mutex_exit(&vdp->xdf_cb_lk); 3202 goto err; 3203 } 3204 3205 mutex_exit(&vdp->xdf_cb_lk); 3206 3207 if (xdf_debug & SUSRES_DBG) 3208 xen_printf("xdf@%s: xdf_resume: done\n", vdp->xdf_addr); 3209 return (DDI_SUCCESS); 3210 err: 3211 if (xdf_debug & SUSRES_DBG) 3212 xen_printf("xdf@%s: xdf_resume: fail\n", vdp->xdf_addr); 3213 return (DDI_FAILURE); 3214 } 3215 3216 static int 3217 xdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3218 { 3219 int n, instance = ddi_get_instance(dip); 3220 ddi_iblock_cookie_t ibc, softibc; 3221 boolean_t dev_iscd = B_FALSE; 3222 xdf_t *vdp; 3223 char *oename, *xsname, *str; 3224 3225 if ((n = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_NOTPROM, 3226 "xdf_debug", 0)) != 0) 3227 xdf_debug = n; 3228 3229 switch (cmd) { 3230 case DDI_RESUME: 3231 return (xdf_resume(dip)); 3232 case DDI_ATTACH: 3233 break; 3234 default: 3235 return (DDI_FAILURE); 3236 } 3237 /* DDI_ATTACH */ 3238 3239 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 3240 ((oename = xvdi_get_oename(dip)) == NULL)) 3241 return (DDI_FAILURE); 3242 3243 /* 3244 * Disable auto-detach. This is necessary so that we don't get 3245 * detached while we're disconnected from the back end. 3246 */ 3247 if ((ddi_prop_update_int(DDI_DEV_T_NONE, dip, 3248 DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS)) 3249 return (DDI_FAILURE); 3250 3251 /* driver handles kernel-issued IOCTLs */ 3252 if (ddi_prop_create(DDI_DEV_T_NONE, dip, 3253 DDI_PROP_CANSLEEP, DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) 3254 return (DDI_FAILURE); 3255 3256 if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS) 3257 return (DDI_FAILURE); 3258 3259 if (ddi_get_soft_iblock_cookie(dip, 3260 DDI_SOFTINT_LOW, &softibc) != DDI_SUCCESS) 3261 return (DDI_FAILURE); 3262 3263 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 3264 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 3265 ddi_get_name_addr(dip)); 3266 return (DDI_FAILURE); 3267 } 3268 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 3269 dev_iscd = B_TRUE; 3270 strfree(str); 3271 3272 if (ddi_soft_state_zalloc(xdf_ssp, instance) != DDI_SUCCESS) 3273 return (DDI_FAILURE); 3274 3275 DPRINTF(DDI_DBG, ("xdf@%s: attaching\n", ddi_get_name_addr(dip))); 3276 vdp = ddi_get_soft_state(xdf_ssp, instance); 3277 ddi_set_driver_private(dip, vdp); 3278 vdp->xdf_dip = dip; 3279 vdp->xdf_addr = ddi_get_name_addr(dip); 3280 vdp->xdf_suspending = B_FALSE; 3281 vdp->xdf_media_req_supported = B_FALSE; 3282 vdp->xdf_peer = INVALID_DOMID; 3283 vdp->xdf_evtchn = INVALID_EVTCHN; 3284 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 3285 offsetof(v_req_t, v_link)); 3286 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 3287 cv_init(&vdp->xdf_hp_status_cv, NULL, CV_DEFAULT, NULL); 3288 cv_init(&vdp->xdf_mstate_cv, NULL, CV_DEFAULT, NULL); 3289 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3290 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3291 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3292 vdp->xdf_cmbl_reattach = B_TRUE; 3293 if (dev_iscd) { 3294 vdp->xdf_dinfo |= VDISK_CDROM; 3295 vdp->xdf_mstate = DKIO_EJECTED; 3296 } else { 3297 vdp->xdf_mstate = DKIO_NONE; 3298 } 3299 3300 if ((vdp->xdf_ready_tq = ddi_taskq_create(dip, "xdf_ready_tq", 3301 1, TASKQ_DEFAULTPRI, 0)) == NULL) 3302 goto errout0; 3303 3304 if (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3305 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS) 3306 goto errout0; 3307 3308 if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 3309 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 3310 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 3311 ddi_get_name_addr(dip)); 3312 goto errout0; 3313 } 3314 3315 /* 3316 * Initialize the physical geometry stucture. Note that currently 3317 * we don't know the size of the backend device so the number 3318 * of blocks on the device will be initialized to zero. Once 3319 * we connect to the backend device we'll update the physical 3320 * geometry to reflect the real size of the device. 3321 */ 3322 xdf_synthetic_pgeom(dip, &vdp->xdf_pgeom); 3323 vdp->xdf_pgeom_fixed = B_FALSE; 3324 3325 /* 3326 * create default device minor nodes: non-removable disk 3327 * we will adjust minor nodes after we are connected w/ backend 3328 */ 3329 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 3330 if (xdf_cmlb_attach(vdp) != 0) { 3331 cmn_err(CE_WARN, 3332 "xdf@%s: attach failed, cmlb attach failed", 3333 ddi_get_name_addr(dip)); 3334 goto errout0; 3335 } 3336 3337 /* 3338 * We ship with cache-enabled disks 3339 */ 3340 vdp->xdf_wce = B_TRUE; 3341 3342 mutex_enter(&vdp->xdf_cb_lk); 3343 /* Watch backend XenbusState change */ 3344 if (xvdi_add_event_handler(dip, 3345 XS_OE_STATE, xdf_oe_change, NULL) != DDI_SUCCESS) { 3346 mutex_exit(&vdp->xdf_cb_lk); 3347 goto errout0; 3348 } 3349 3350 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3351 cmn_err(CE_WARN, "xdf@%s: start connection failed", 3352 ddi_get_name_addr(dip)); 3353 mutex_exit(&vdp->xdf_cb_lk); 3354 goto errout1; 3355 } 3356 mutex_exit(&vdp->xdf_cb_lk); 3357 3358 #if defined(XPV_HVM_DRIVER) 3359 3360 xdf_hvm_add(dip); 3361 3362 /* Report our version to dom0. */ 3363 if (xenbus_printf(XBT_NULL, "guest/xdf", "version", "%d", 3364 HVMPV_XDF_VERS)) 3365 cmn_err(CE_WARN, "xdf: couldn't write version\n"); 3366 3367 #else /* !XPV_HVM_DRIVER */ 3368 3369 /* create kstat for iostat(1M) */ 3370 if (xdf_kstat_create(dip, "xdf", instance) != 0) { 3371 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 3372 ddi_get_name_addr(dip)); 3373 goto errout1; 3374 } 3375 3376 #endif /* !XPV_HVM_DRIVER */ 3377 3378 ddi_report_dev(dip); 3379 DPRINTF(DDI_DBG, ("xdf@%s: attached\n", vdp->xdf_addr)); 3380 return (DDI_SUCCESS); 3381 3382 errout1: 3383 (void) xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed); 3384 xvdi_remove_event_handler(dip, XS_OE_STATE); 3385 errout0: 3386 if (vdp->xdf_vd_lbl != NULL) { 3387 cmlb_detach(vdp->xdf_vd_lbl, NULL); 3388 cmlb_free_handle(&vdp->xdf_vd_lbl); 3389 vdp->xdf_vd_lbl = NULL; 3390 } 3391 if (vdp->xdf_softintr_id != NULL) 3392 ddi_remove_softintr(vdp->xdf_softintr_id); 3393 xvdi_remove_xb_watch_handlers(dip); 3394 if (vdp->xdf_ready_tq != NULL) 3395 ddi_taskq_destroy(vdp->xdf_ready_tq); 3396 mutex_destroy(&vdp->xdf_cb_lk); 3397 mutex_destroy(&vdp->xdf_dev_lk); 3398 cv_destroy(&vdp->xdf_dev_cv); 3399 cv_destroy(&vdp->xdf_hp_status_cv); 3400 ddi_soft_state_free(xdf_ssp, instance); 3401 ddi_set_driver_private(dip, NULL); 3402 ddi_prop_remove_all(dip); 3403 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(dip)); 3404 return (DDI_FAILURE); 3405 } 3406 3407 static int 3408 xdf_suspend(dev_info_t *dip) 3409 { 3410 int instance = ddi_get_instance(dip); 3411 xdf_t *vdp; 3412 3413 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) 3414 return (DDI_FAILURE); 3415 3416 if (xdf_debug & SUSRES_DBG) 3417 xen_printf("xdf@%s: xdf_suspend\n", vdp->xdf_addr); 3418 3419 xvdi_suspend(dip); 3420 3421 mutex_enter(&vdp->xdf_cb_lk); 3422 mutex_enter(&vdp->xdf_dev_lk); 3423 3424 vdp->xdf_suspending = B_TRUE; 3425 xdf_ring_destroy(vdp); 3426 xdf_set_state(vdp, XD_SUSPEND); 3427 vdp->xdf_suspending = B_FALSE; 3428 3429 mutex_exit(&vdp->xdf_dev_lk); 3430 mutex_exit(&vdp->xdf_cb_lk); 3431 3432 if (xdf_debug & SUSRES_DBG) 3433 xen_printf("xdf@%s: xdf_suspend: done\n", vdp->xdf_addr); 3434 3435 return (DDI_SUCCESS); 3436 } 3437 3438 static int 3439 xdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3440 { 3441 xdf_t *vdp; 3442 int instance; 3443 3444 switch (cmd) { 3445 3446 case DDI_PM_SUSPEND: 3447 break; 3448 3449 case DDI_SUSPEND: 3450 return (xdf_suspend(dip)); 3451 3452 case DDI_DETACH: 3453 break; 3454 3455 default: 3456 return (DDI_FAILURE); 3457 } 3458 3459 instance = ddi_get_instance(dip); 3460 DPRINTF(DDI_DBG, ("xdf@%s: detaching\n", ddi_get_name_addr(dip))); 3461 vdp = ddi_get_soft_state(xdf_ssp, instance); 3462 3463 if (vdp == NULL) 3464 return (DDI_FAILURE); 3465 3466 mutex_enter(&vdp->xdf_cb_lk); 3467 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 3468 if (vdp->xdf_state != XD_CLOSED) { 3469 mutex_exit(&vdp->xdf_cb_lk); 3470 return (DDI_FAILURE); 3471 } 3472 mutex_exit(&vdp->xdf_cb_lk); 3473 3474 ASSERT(!ISDMACBON(vdp)); 3475 3476 #if defined(XPV_HVM_DRIVER) 3477 xdf_hvm_rm(dip); 3478 #endif /* XPV_HVM_DRIVER */ 3479 3480 if (vdp->xdf_timeout_id != 0) 3481 (void) untimeout(vdp->xdf_timeout_id); 3482 3483 xvdi_remove_event_handler(dip, XS_OE_STATE); 3484 ddi_taskq_destroy(vdp->xdf_ready_tq); 3485 3486 cmlb_detach(vdp->xdf_vd_lbl, NULL); 3487 cmlb_free_handle(&vdp->xdf_vd_lbl); 3488 3489 /* we'll support backend running in domU later */ 3490 #ifdef DOMU_BACKEND 3491 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 3492 #endif 3493 3494 list_destroy(&vdp->xdf_vreq_act); 3495 ddi_prop_remove_all(dip); 3496 xdf_kstat_delete(dip); 3497 ddi_remove_softintr(vdp->xdf_softintr_id); 3498 xvdi_remove_xb_watch_handlers(dip); 3499 ddi_set_driver_private(dip, NULL); 3500 cv_destroy(&vdp->xdf_dev_cv); 3501 mutex_destroy(&vdp->xdf_cb_lk); 3502 mutex_destroy(&vdp->xdf_dev_lk); 3503 if (vdp->xdf_cache_flush_block != NULL) 3504 kmem_free(vdp->xdf_flush_mem, 2 * vdp->xdf_xdev_secsize); 3505 ddi_soft_state_free(xdf_ssp, instance); 3506 return (DDI_SUCCESS); 3507 } 3508 3509 /* 3510 * Driver linkage structures. 3511 */ 3512 static struct cb_ops xdf_cbops = { 3513 xdf_open, 3514 xdf_close, 3515 xdf_strategy, 3516 nodev, 3517 xdf_dump, 3518 xdf_read, 3519 xdf_write, 3520 xdf_ioctl, 3521 nodev, 3522 nodev, 3523 nodev, 3524 nochpoll, 3525 xdf_prop_op, 3526 NULL, 3527 D_MP | D_NEW | D_64BIT, 3528 CB_REV, 3529 xdf_aread, 3530 xdf_awrite 3531 }; 3532 3533 struct dev_ops xdf_devops = { 3534 DEVO_REV, /* devo_rev */ 3535 0, /* devo_refcnt */ 3536 xdf_getinfo, /* devo_getinfo */ 3537 nulldev, /* devo_identify */ 3538 nulldev, /* devo_probe */ 3539 xdf_attach, /* devo_attach */ 3540 xdf_detach, /* devo_detach */ 3541 nodev, /* devo_reset */ 3542 &xdf_cbops, /* devo_cb_ops */ 3543 NULL, /* devo_bus_ops */ 3544 NULL, /* devo_power */ 3545 ddi_quiesce_not_supported, /* devo_quiesce */ 3546 }; 3547 3548 /* 3549 * Module linkage structures. 3550 */ 3551 static struct modldrv modldrv = { 3552 &mod_driverops, /* Type of module. This one is a driver */ 3553 "virtual block driver", /* short description */ 3554 &xdf_devops /* driver specific ops */ 3555 }; 3556 3557 static struct modlinkage xdf_modlinkage = { 3558 MODREV_1, (void *)&modldrv, NULL 3559 }; 3560 3561 /* 3562 * standard module entry points 3563 */ 3564 int 3565 _init(void) 3566 { 3567 int rc; 3568 3569 xdf_major = ddi_name_to_major("xdf"); 3570 if (xdf_major == (major_t)-1) 3571 return (EINVAL); 3572 3573 if ((rc = ddi_soft_state_init(&xdf_ssp, sizeof (xdf_t), 0)) != 0) 3574 return (rc); 3575 3576 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 3577 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3578 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 3579 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3580 3581 #if defined(XPV_HVM_DRIVER) 3582 xdf_hvm_init(); 3583 #endif /* XPV_HVM_DRIVER */ 3584 3585 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 3586 #if defined(XPV_HVM_DRIVER) 3587 xdf_hvm_fini(); 3588 #endif /* XPV_HVM_DRIVER */ 3589 kmem_cache_destroy(xdf_vreq_cache); 3590 kmem_cache_destroy(xdf_gs_cache); 3591 ddi_soft_state_fini(&xdf_ssp); 3592 return (rc); 3593 } 3594 3595 return (rc); 3596 } 3597 3598 int 3599 _fini(void) 3600 { 3601 int err; 3602 if ((err = mod_remove(&xdf_modlinkage)) != 0) 3603 return (err); 3604 3605 #if defined(XPV_HVM_DRIVER) 3606 xdf_hvm_fini(); 3607 #endif /* XPV_HVM_DRIVER */ 3608 3609 kmem_cache_destroy(xdf_vreq_cache); 3610 kmem_cache_destroy(xdf_gs_cache); 3611 ddi_soft_state_fini(&xdf_ssp); 3612 3613 return (0); 3614 } 3615 3616 int 3617 _info(struct modinfo *modinfop) 3618 { 3619 return (mod_info(&xdf_modlinkage, modinfop)); 3620 } 3621