1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * xdf.c - Xen Virtual Block Device Driver 29 * TODO: 30 * - support alternate block size (currently only DEV_BSIZE supported) 31 * - revalidate geometry for removable devices 32 * 33 * This driver export solaris disk device nodes, accepts IO requests from 34 * those nodes, and services those requests by talking to a backend device 35 * in another domain. 36 * 37 * Communication with the backend device is done via a ringbuffer (which is 38 * managed via xvdi interfaces) and dma memory (which is managed via ddi 39 * interfaces). 40 * 41 * Communication with the backend device is dependant upon establishing a 42 * connection to the backend device. This connection process involves 43 * reading device configuration information from xenbus and publishing 44 * some frontend runtime configuration parameters via the xenbus (for 45 * consumption by the backend). Once we've published runtime configuration 46 * information via the xenbus, the backend device can enter the connected 47 * state and we'll enter the XD_CONNECTED state. But before we can allow 48 * random IO to begin, we need to do IO to the backend device to determine 49 * the device label and if flush operations are supported. Once this is 50 * done we enter the XD_READY state and can process any IO operations. 51 * 52 * We recieve notifications of xenbus state changes for the backend device 53 * (aka, the "other end") via the xdf_oe_change() callback. This callback 54 * is single threaded, meaning that we can't recieve new notification of 55 * other end state changes while we're processing an outstanding 56 * notification of an other end state change. There for we can't do any 57 * blocking operations from the xdf_oe_change() callback. This is why we 58 * have a seperate taskq (xdf_ready_tq) which exists to do the necessary 59 * IO to get us from the XD_CONNECTED to the XD_READY state. All IO 60 * generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go 61 * throught xdf_lb_rdwr(), which is a synchronous IO interface. IOs 62 * generated by the xdf_ready_tq_thread thread have priority over all 63 * other IO requests. 64 * 65 * We also communicate with the backend device via the xenbus "media-req" 66 * (XBP_MEDIA_REQ) property. For more information on this see the 67 * comments in blkif.h. 68 */ 69 70 #include <io/xdf.h> 71 72 #include <sys/conf.h> 73 #include <sys/dkio.h> 74 #include <sys/promif.h> 75 #include <sys/sysmacros.h> 76 #include <sys/kstat.h> 77 #include <sys/mach_mmu.h> 78 #ifdef XPV_HVM_DRIVER 79 #include <sys/xpv_support.h> 80 #include <sys/sunndi.h> 81 #else /* !XPV_HVM_DRIVER */ 82 #include <sys/evtchn_impl.h> 83 #endif /* !XPV_HVM_DRIVER */ 84 #include <public/io/xenbus.h> 85 #include <xen/sys/xenbus_impl.h> 86 #include <sys/scsi/generic/inquiry.h> 87 #include <xen/io/blkif_impl.h> 88 #include <sys/fdio.h> 89 #include <sys/cdio.h> 90 91 /* 92 * DEBUG_EVAL can be used to include debug only statements without 93 * having to use '#ifdef DEBUG' statements 94 */ 95 #ifdef DEBUG 96 #define DEBUG_EVAL(x) (x) 97 #else /* !DEBUG */ 98 #define DEBUG_EVAL(x) 99 #endif /* !DEBUG */ 100 101 #define XDF_DRAIN_MSEC_DELAY (50*1000) /* 00.05 sec */ 102 #define XDF_DRAIN_RETRY_COUNT 200 /* 10.00 sec */ 103 104 #define INVALID_DOMID ((domid_t)-1) 105 #define FLUSH_DISKCACHE 0x1 106 #define WRITE_BARRIER 0x2 107 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 108 #define USE_WRITE_BARRIER(vdp) \ 109 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 110 #define USE_FLUSH_DISKCACHE(vdp) \ 111 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 112 #define IS_WRITE_BARRIER(vdp, bp) \ 113 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 114 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 115 #define IS_FLUSH_DISKCACHE(bp) \ 116 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 117 118 #define VREQ_DONE(vreq) \ 119 VOID2BOOLEAN(((vreq)->v_status == VREQ_DMAWIN_DONE) && \ 120 (((vreq)->v_flush_diskcache == FLUSH_DISKCACHE) || \ 121 (((vreq)->v_dmaw + 1) == (vreq)->v_ndmaws))) 122 123 #define BP_VREQ(bp) ((v_req_t *)((bp)->av_back)) 124 #define BP_VREQ_SET(bp, vreq) (((bp)->av_back = (buf_t *)(vreq))) 125 126 extern int do_polled_io; 127 128 /* run-time tunables that we don't want the compiler to optimize away */ 129 volatile int xdf_debug = 0; 130 volatile boolean_t xdf_barrier_flush_disable = B_FALSE; 131 132 /* per module globals */ 133 major_t xdf_major; 134 static void *xdf_ssp; 135 static kmem_cache_t *xdf_vreq_cache; 136 static kmem_cache_t *xdf_gs_cache; 137 static int xdf_maxphys = XB_MAXPHYS; 138 static diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 139 static int xdf_fbrewrites; /* flush block re-write count */ 140 141 /* misc public functions (used by xdf_shell.c) */ 142 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); 143 int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 144 145 /* misc private functions */ 146 static void xdf_io_start(xdf_t *); 147 148 /* callbacks from commmon label */ 149 static cmlb_tg_ops_t xdf_lb_ops = { 150 TG_DK_OPS_VERSION_1, 151 xdf_lb_rdwr, 152 xdf_lb_getinfo 153 }; 154 155 /* 156 * I/O buffer DMA attributes 157 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 158 */ 159 static ddi_dma_attr_t xb_dma_attr = { 160 DMA_ATTR_V0, 161 (uint64_t)0, /* lowest address */ 162 (uint64_t)0xffffffffffffffff, /* highest usable address */ 163 (uint64_t)0xffffff, /* DMA counter limit max */ 164 (uint64_t)XB_BSIZE, /* alignment in bytes */ 165 XB_BSIZE - 1, /* bitmap of burst sizes */ 166 XB_BSIZE, /* min transfer */ 167 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 168 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 169 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 170 XB_BSIZE, /* granularity */ 171 0, /* flags (reserved) */ 172 }; 173 174 static ddi_device_acc_attr_t xc_acc_attr = { 175 DDI_DEVICE_ATTR_V0, 176 DDI_NEVERSWAP_ACC, 177 DDI_STRICTORDER_ACC 178 }; 179 180 static void 181 xdf_timeout_handler(void *arg) 182 { 183 xdf_t *vdp = arg; 184 185 mutex_enter(&vdp->xdf_dev_lk); 186 vdp->xdf_timeout_id = 0; 187 mutex_exit(&vdp->xdf_dev_lk); 188 189 /* new timeout thread could be re-scheduled */ 190 xdf_io_start(vdp); 191 } 192 193 /* 194 * callback func when DMA/GTE resources is available 195 * 196 * Note: we only register one callback function to grant table subsystem 197 * since we only have one 'struct gnttab_free_callback' in xdf_t. 198 */ 199 static int 200 xdf_dmacallback(caddr_t arg) 201 { 202 xdf_t *vdp = (xdf_t *)arg; 203 ASSERT(vdp != NULL); 204 205 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 206 vdp->xdf_addr)); 207 208 ddi_trigger_softintr(vdp->xdf_softintr_id); 209 return (DDI_DMA_CALLBACK_DONE); 210 } 211 212 static ge_slot_t * 213 gs_get(xdf_t *vdp, int isread) 214 { 215 grant_ref_t gh; 216 ge_slot_t *gs; 217 218 /* try to alloc GTEs needed in this slot, first */ 219 if (gnttab_alloc_grant_references( 220 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 221 if (vdp->xdf_gnt_callback.next == NULL) { 222 SETDMACBON(vdp); 223 gnttab_request_free_callback( 224 &vdp->xdf_gnt_callback, 225 (void (*)(void *))xdf_dmacallback, 226 (void *)vdp, 227 BLKIF_MAX_SEGMENTS_PER_REQUEST); 228 } 229 return (NULL); 230 } 231 232 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 233 if (gs == NULL) { 234 gnttab_free_grant_references(gh); 235 if (vdp->xdf_timeout_id == 0) 236 /* restart I/O after one second */ 237 vdp->xdf_timeout_id = 238 timeout(xdf_timeout_handler, vdp, hz); 239 return (NULL); 240 } 241 242 /* init gs_slot */ 243 gs->gs_oeid = vdp->xdf_peer; 244 gs->gs_isread = isread; 245 gs->gs_ghead = gh; 246 gs->gs_ngrefs = 0; 247 248 return (gs); 249 } 250 251 static void 252 gs_free(ge_slot_t *gs) 253 { 254 int i; 255 256 /* release all grant table entry resources used in this slot */ 257 for (i = 0; i < gs->gs_ngrefs; i++) 258 gnttab_end_foreign_access(gs->gs_ge[i], !gs->gs_isread, 0); 259 gnttab_free_grant_references(gs->gs_ghead); 260 list_remove(&gs->gs_vreq->v_gs, gs); 261 kmem_cache_free(xdf_gs_cache, gs); 262 } 263 264 static grant_ref_t 265 gs_grant(ge_slot_t *gs, mfn_t mfn) 266 { 267 grant_ref_t gr = gnttab_claim_grant_reference(&gs->gs_ghead); 268 269 ASSERT(gr != -1); 270 ASSERT(gs->gs_ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 271 gs->gs_ge[gs->gs_ngrefs++] = gr; 272 gnttab_grant_foreign_access_ref(gr, gs->gs_oeid, mfn, !gs->gs_isread); 273 274 return (gr); 275 } 276 277 /* 278 * Alloc a vreq for this bp 279 * bp->av_back contains the pointer to the vreq upon return 280 */ 281 static v_req_t * 282 vreq_get(xdf_t *vdp, buf_t *bp) 283 { 284 v_req_t *vreq = NULL; 285 286 ASSERT(BP_VREQ(bp) == NULL); 287 288 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 289 if (vreq == NULL) { 290 if (vdp->xdf_timeout_id == 0) 291 /* restart I/O after one second */ 292 vdp->xdf_timeout_id = 293 timeout(xdf_timeout_handler, vdp, hz); 294 return (NULL); 295 } 296 bzero(vreq, sizeof (v_req_t)); 297 list_create(&vreq->v_gs, sizeof (ge_slot_t), 298 offsetof(ge_slot_t, gs_vreq_link)); 299 vreq->v_buf = bp; 300 vreq->v_status = VREQ_INIT; 301 vreq->v_runq = B_FALSE; 302 BP_VREQ_SET(bp, vreq); 303 /* init of other fields in vreq is up to the caller */ 304 305 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 306 307 return (vreq); 308 } 309 310 static void 311 vreq_free(xdf_t *vdp, v_req_t *vreq) 312 { 313 buf_t *bp = vreq->v_buf; 314 315 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 316 ASSERT(BP_VREQ(bp) == vreq); 317 318 list_remove(&vdp->xdf_vreq_act, vreq); 319 320 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 321 goto done; 322 323 switch (vreq->v_status) { 324 case VREQ_DMAWIN_DONE: 325 case VREQ_GS_ALLOCED: 326 case VREQ_DMABUF_BOUND: 327 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 328 /*FALLTHRU*/ 329 case VREQ_DMAMEM_ALLOCED: 330 if (!ALIGNED_XFER(bp)) { 331 ASSERT(vreq->v_abuf != NULL); 332 if (!IS_ERROR(bp) && IS_READ(bp)) 333 bcopy(vreq->v_abuf, bp->b_un.b_addr, 334 bp->b_bcount); 335 ddi_dma_mem_free(&vreq->v_align); 336 } 337 /*FALLTHRU*/ 338 case VREQ_MEMDMAHDL_ALLOCED: 339 if (!ALIGNED_XFER(bp)) 340 ddi_dma_free_handle(&vreq->v_memdmahdl); 341 /*FALLTHRU*/ 342 case VREQ_DMAHDL_ALLOCED: 343 ddi_dma_free_handle(&vreq->v_dmahdl); 344 break; 345 default: 346 break; 347 } 348 done: 349 ASSERT(!vreq->v_runq); 350 list_destroy(&vreq->v_gs); 351 kmem_cache_free(xdf_vreq_cache, vreq); 352 } 353 354 /* 355 * Snarf new data if our flush block was re-written 356 */ 357 static void 358 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 359 { 360 int nblks; 361 boolean_t mapin; 362 363 if (IS_WRITE_BARRIER(vdp, bp)) 364 return; /* write was a flush write */ 365 366 mapin = B_FALSE; 367 nblks = bp->b_bcount >> DEV_BSHIFT; 368 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 369 xdf_fbrewrites++; 370 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 371 mapin = B_TRUE; 372 bp_mapin(bp); 373 } 374 bcopy(bp->b_un.b_addr + 375 ((xdf_flush_block - blkno) << DEV_BSHIFT), 376 vdp->xdf_cache_flush_block, DEV_BSIZE); 377 if (mapin) 378 bp_mapout(bp); 379 } 380 } 381 382 /* 383 * Initalize the DMA and grant table resources for the buf 384 */ 385 static int 386 vreq_setup(xdf_t *vdp, v_req_t *vreq) 387 { 388 int rc; 389 ddi_dma_attr_t dmaattr; 390 uint_t ndcs, ndws; 391 ddi_dma_handle_t dh; 392 ddi_dma_handle_t mdh; 393 ddi_dma_cookie_t dc; 394 ddi_acc_handle_t abh; 395 caddr_t aba; 396 ge_slot_t *gs; 397 size_t bufsz; 398 off_t off; 399 size_t sz; 400 buf_t *bp = vreq->v_buf; 401 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 402 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 403 404 switch (vreq->v_status) { 405 case VREQ_INIT: 406 if (IS_FLUSH_DISKCACHE(bp)) { 407 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 408 DPRINTF(DMA_DBG, ("xdf@%s: " 409 "get ge_slotfailed\n", vdp->xdf_addr)); 410 return (DDI_FAILURE); 411 } 412 vreq->v_blkno = 0; 413 vreq->v_nslots = 1; 414 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 415 vreq->v_status = VREQ_GS_ALLOCED; 416 gs->gs_vreq = vreq; 417 list_insert_head(&vreq->v_gs, gs); 418 return (DDI_SUCCESS); 419 } 420 421 if (IS_WRITE_BARRIER(vdp, bp)) 422 vreq->v_flush_diskcache = WRITE_BARRIER; 423 vreq->v_blkno = bp->b_blkno + 424 (diskaddr_t)(uintptr_t)bp->b_private; 425 /* See if we wrote new data to our flush block */ 426 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 427 check_fbwrite(vdp, bp, vreq->v_blkno); 428 vreq->v_status = VREQ_INIT_DONE; 429 /*FALLTHRU*/ 430 431 case VREQ_INIT_DONE: 432 /* 433 * alloc DMA handle 434 */ 435 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 436 xdf_dmacallback, (caddr_t)vdp, &dh); 437 if (rc != DDI_SUCCESS) { 438 SETDMACBON(vdp); 439 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 440 vdp->xdf_addr)); 441 return (DDI_FAILURE); 442 } 443 444 vreq->v_dmahdl = dh; 445 vreq->v_status = VREQ_DMAHDL_ALLOCED; 446 /*FALLTHRU*/ 447 448 case VREQ_DMAHDL_ALLOCED: 449 /* 450 * alloc dma handle for 512-byte aligned buf 451 */ 452 if (!ALIGNED_XFER(bp)) { 453 /* 454 * XXPV: we need to temporarily enlarge the seg 455 * boundary and s/g length to work round CR6381968 456 */ 457 dmaattr = xb_dma_attr; 458 dmaattr.dma_attr_seg = (uint64_t)-1; 459 dmaattr.dma_attr_sgllen = INT_MAX; 460 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 461 xdf_dmacallback, (caddr_t)vdp, &mdh); 462 if (rc != DDI_SUCCESS) { 463 SETDMACBON(vdp); 464 DPRINTF(DMA_DBG, ("xdf@%s: " 465 "unaligned buf DMAhandle alloc failed\n", 466 vdp->xdf_addr)); 467 return (DDI_FAILURE); 468 } 469 vreq->v_memdmahdl = mdh; 470 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 471 } 472 /*FALLTHRU*/ 473 474 case VREQ_MEMDMAHDL_ALLOCED: 475 /* 476 * alloc 512-byte aligned buf 477 */ 478 if (!ALIGNED_XFER(bp)) { 479 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 480 bp_mapin(bp); 481 482 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 483 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 484 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 485 &aba, &bufsz, &abh); 486 if (rc != DDI_SUCCESS) { 487 SETDMACBON(vdp); 488 DPRINTF(DMA_DBG, ("xdf@%s: " 489 "DMA mem allocation failed\n", 490 vdp->xdf_addr)); 491 return (DDI_FAILURE); 492 } 493 494 vreq->v_abuf = aba; 495 vreq->v_align = abh; 496 vreq->v_status = VREQ_DMAMEM_ALLOCED; 497 498 ASSERT(bufsz >= bp->b_bcount); 499 if (!IS_READ(bp)) 500 bcopy(bp->b_un.b_addr, vreq->v_abuf, 501 bp->b_bcount); 502 } 503 /*FALLTHRU*/ 504 505 case VREQ_DMAMEM_ALLOCED: 506 /* 507 * dma bind 508 */ 509 if (ALIGNED_XFER(bp)) { 510 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 511 dma_flags, xdf_dmacallback, (caddr_t)vdp, 512 &dc, &ndcs); 513 } else { 514 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 515 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 516 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 517 } 518 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 519 /* get num of dma windows */ 520 if (rc == DDI_DMA_PARTIAL_MAP) { 521 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 522 ASSERT(rc == DDI_SUCCESS); 523 } else { 524 ndws = 1; 525 } 526 } else { 527 SETDMACBON(vdp); 528 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 529 vdp->xdf_addr)); 530 return (DDI_FAILURE); 531 } 532 533 vreq->v_dmac = dc; 534 vreq->v_dmaw = 0; 535 vreq->v_ndmacs = ndcs; 536 vreq->v_ndmaws = ndws; 537 vreq->v_nslots = ndws; 538 vreq->v_status = VREQ_DMABUF_BOUND; 539 /*FALLTHRU*/ 540 541 case VREQ_DMABUF_BOUND: 542 /* 543 * get ge_slot, callback is set upon failure from gs_get(), 544 * if not set previously 545 */ 546 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 547 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 548 vdp->xdf_addr)); 549 return (DDI_FAILURE); 550 } 551 552 vreq->v_status = VREQ_GS_ALLOCED; 553 gs->gs_vreq = vreq; 554 list_insert_head(&vreq->v_gs, gs); 555 break; 556 557 case VREQ_GS_ALLOCED: 558 /* nothing need to be done */ 559 break; 560 561 case VREQ_DMAWIN_DONE: 562 /* 563 * move to the next dma window 564 */ 565 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 566 567 /* get a ge_slot for this DMA window */ 568 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 569 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 570 vdp->xdf_addr)); 571 return (DDI_FAILURE); 572 } 573 574 vreq->v_dmaw++; 575 VERIFY(ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 576 &vreq->v_dmac, &vreq->v_ndmacs) == DDI_SUCCESS); 577 vreq->v_status = VREQ_GS_ALLOCED; 578 gs->gs_vreq = vreq; 579 list_insert_head(&vreq->v_gs, gs); 580 break; 581 582 default: 583 return (DDI_FAILURE); 584 } 585 586 return (DDI_SUCCESS); 587 } 588 589 static int 590 xdf_cmlb_attach(xdf_t *vdp) 591 { 592 dev_info_t *dip = vdp->xdf_dip; 593 594 return (cmlb_attach(dip, &xdf_lb_ops, 595 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 596 XD_IS_RM(vdp), 597 B_TRUE, 598 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD, 599 #if defined(XPV_HVM_DRIVER) 600 (XD_IS_CD(vdp) ? 0 : CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT) | 601 CMLB_INTERNAL_MINOR_NODES, 602 #else /* !XPV_HVM_DRIVER */ 603 XD_IS_CD(vdp) ? 0 : CMLB_FAKE_LABEL_ONE_PARTITION, 604 #endif /* !XPV_HVM_DRIVER */ 605 vdp->xdf_vd_lbl, NULL)); 606 } 607 608 static void 609 xdf_io_err(buf_t *bp, int err, size_t resid) 610 { 611 bioerror(bp, err); 612 if (resid == 0) 613 bp->b_resid = bp->b_bcount; 614 biodone(bp); 615 } 616 617 static void 618 xdf_kstat_enter(xdf_t *vdp, buf_t *bp) 619 { 620 v_req_t *vreq = BP_VREQ(bp); 621 622 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 623 624 if (vdp->xdf_xdev_iostat == NULL) 625 return; 626 if ((vreq != NULL) && vreq->v_runq) { 627 kstat_runq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 628 } else { 629 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 630 } 631 } 632 633 static void 634 xdf_kstat_exit(xdf_t *vdp, buf_t *bp) 635 { 636 v_req_t *vreq = BP_VREQ(bp); 637 638 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 639 640 if (vdp->xdf_xdev_iostat == NULL) 641 return; 642 if ((vreq != NULL) && vreq->v_runq) { 643 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 644 } else { 645 kstat_waitq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 646 } 647 } 648 649 static void 650 xdf_kstat_waitq_to_runq(xdf_t *vdp, buf_t *bp) 651 { 652 v_req_t *vreq = BP_VREQ(bp); 653 654 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 655 ASSERT(!vreq->v_runq); 656 657 vreq->v_runq = B_TRUE; 658 if (vdp->xdf_xdev_iostat == NULL) 659 return; 660 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 661 } 662 663 static void 664 xdf_kstat_runq_to_waitq(xdf_t *vdp, buf_t *bp) 665 { 666 v_req_t *vreq = BP_VREQ(bp); 667 668 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 669 ASSERT(vreq->v_runq); 670 671 vreq->v_runq = B_FALSE; 672 if (vdp->xdf_xdev_iostat == NULL) 673 return; 674 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 675 } 676 677 int 678 xdf_kstat_create(dev_info_t *dip, char *ks_module, int instance) 679 { 680 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 681 kstat_t *kstat; 682 buf_t *bp; 683 684 if ((kstat = kstat_create( 685 ks_module, instance, NULL, "disk", 686 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) 687 return (-1); 688 689 /* See comment about locking in xdf_kstat_delete(). */ 690 mutex_enter(&vdp->xdf_iostat_lk); 691 mutex_enter(&vdp->xdf_dev_lk); 692 693 /* only one kstat can exist at a time */ 694 if (vdp->xdf_xdev_iostat != NULL) { 695 mutex_exit(&vdp->xdf_dev_lk); 696 mutex_exit(&vdp->xdf_iostat_lk); 697 kstat_delete(kstat); 698 return (-1); 699 } 700 701 vdp->xdf_xdev_iostat = kstat; 702 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 703 kstat_install(vdp->xdf_xdev_iostat); 704 705 /* 706 * Now that we've created a kstat, we need to update the waitq and 707 * runq counts for the kstat to reflect our current state. 708 * 709 * For a buf_t structure to be on the runq, it must have a ring 710 * buffer slot associated with it. To get a ring buffer slot the 711 * buf must first have a v_req_t and a ge_slot_t associated with it. 712 * Then when it is granted a ring buffer slot, v_runq will be set to 713 * true. 714 * 715 * For a buf_t structure to be on the waitq, it must not be on the 716 * runq. So to find all the buf_t's that should be on waitq, we 717 * walk the active buf list and add any buf_t's which aren't on the 718 * runq to the waitq. 719 */ 720 bp = vdp->xdf_f_act; 721 while (bp != NULL) { 722 xdf_kstat_enter(vdp, bp); 723 bp = bp->av_forw; 724 } 725 if (vdp->xdf_ready_tq_bp != NULL) 726 xdf_kstat_enter(vdp, vdp->xdf_ready_tq_bp); 727 728 mutex_exit(&vdp->xdf_dev_lk); 729 mutex_exit(&vdp->xdf_iostat_lk); 730 return (0); 731 } 732 733 void 734 xdf_kstat_delete(dev_info_t *dip) 735 { 736 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 737 kstat_t *kstat; 738 buf_t *bp; 739 740 /* 741 * The locking order here is xdf_iostat_lk and then xdf_dev_lk. 742 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer 743 * and the contents of the our kstat. xdf_iostat_lk is used 744 * to protect the allocation and freeing of the actual kstat. 745 * xdf_dev_lk can't be used for this purpose because kstat 746 * readers use it to access the contents of the kstat and 747 * hence it can't be held when calling kstat_delete(). 748 */ 749 mutex_enter(&vdp->xdf_iostat_lk); 750 mutex_enter(&vdp->xdf_dev_lk); 751 752 if (vdp->xdf_xdev_iostat == NULL) { 753 mutex_exit(&vdp->xdf_dev_lk); 754 mutex_exit(&vdp->xdf_iostat_lk); 755 return; 756 } 757 758 /* 759 * We're about to destroy the kstat structures, so it isn't really 760 * necessary to update the runq and waitq counts. But, since this 761 * isn't a hot code path we can afford to be a little pedantic and 762 * go ahead and decrement the runq and waitq kstat counters to zero 763 * before free'ing them. This helps us ensure that we've gotten all 764 * our accounting correct. 765 * 766 * For an explanation of how we determine which buffers go on the 767 * runq vs which go on the waitq, see the comments in 768 * xdf_kstat_create(). 769 */ 770 bp = vdp->xdf_f_act; 771 while (bp != NULL) { 772 xdf_kstat_exit(vdp, bp); 773 bp = bp->av_forw; 774 } 775 if (vdp->xdf_ready_tq_bp != NULL) 776 xdf_kstat_exit(vdp, vdp->xdf_ready_tq_bp); 777 778 kstat = vdp->xdf_xdev_iostat; 779 vdp->xdf_xdev_iostat = NULL; 780 mutex_exit(&vdp->xdf_dev_lk); 781 kstat_delete(kstat); 782 mutex_exit(&vdp->xdf_iostat_lk); 783 } 784 785 /* 786 * Add an IO requests onto the active queue. 787 * 788 * We have to detect IOs generated by xdf_ready_tq_thread. These IOs 789 * are used to establish a connection to the backend, so they recieve 790 * priority over all other IOs. Since xdf_ready_tq_thread only does 791 * synchronous IO, there can only be one xdf_ready_tq_thread request at any 792 * given time and we record the buf associated with that request in 793 * xdf_ready_tq_bp. 794 */ 795 static void 796 xdf_bp_push(xdf_t *vdp, buf_t *bp) 797 { 798 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 799 ASSERT(bp->av_forw == NULL); 800 801 xdf_kstat_enter(vdp, bp); 802 803 if (curthread == vdp->xdf_ready_tq_thread) { 804 /* new IO requests from the ready thread */ 805 ASSERT(vdp->xdf_ready_tq_bp == NULL); 806 vdp->xdf_ready_tq_bp = bp; 807 return; 808 } 809 810 /* this is normal IO request */ 811 ASSERT(bp != vdp->xdf_ready_tq_bp); 812 813 if (vdp->xdf_f_act == NULL) { 814 /* this is only only IO on the active queue */ 815 ASSERT(vdp->xdf_l_act == NULL); 816 ASSERT(vdp->xdf_i_act == NULL); 817 vdp->xdf_f_act = vdp->xdf_l_act = vdp->xdf_i_act = bp; 818 return; 819 } 820 821 /* add this IO to the tail of the active queue */ 822 vdp->xdf_l_act->av_forw = bp; 823 vdp->xdf_l_act = bp; 824 if (vdp->xdf_i_act == NULL) 825 vdp->xdf_i_act = bp; 826 } 827 828 static void 829 xdf_bp_pop(xdf_t *vdp, buf_t *bp) 830 { 831 buf_t *bp_iter; 832 833 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 834 ASSERT(VREQ_DONE(BP_VREQ(bp))); 835 836 if (vdp->xdf_ready_tq_bp == bp) { 837 /* we're done with a ready thread IO request */ 838 ASSERT(bp->av_forw == NULL); 839 vdp->xdf_ready_tq_bp = NULL; 840 return; 841 } 842 843 /* we're done with a normal IO request */ 844 ASSERT((bp->av_forw != NULL) || (bp == vdp->xdf_l_act)); 845 ASSERT((bp->av_forw == NULL) || (bp != vdp->xdf_l_act)); 846 ASSERT(VREQ_DONE(BP_VREQ(vdp->xdf_f_act))); 847 ASSERT(vdp->xdf_f_act != vdp->xdf_i_act); 848 849 if (bp == vdp->xdf_f_act) { 850 /* This IO was at the head of our active queue. */ 851 vdp->xdf_f_act = bp->av_forw; 852 if (bp == vdp->xdf_l_act) 853 vdp->xdf_l_act = NULL; 854 } else { 855 /* There IO finished before some other pending IOs. */ 856 bp_iter = vdp->xdf_f_act; 857 while (bp != bp_iter->av_forw) { 858 bp_iter = bp_iter->av_forw; 859 ASSERT(VREQ_DONE(BP_VREQ(bp_iter))); 860 ASSERT(bp_iter != vdp->xdf_i_act); 861 } 862 bp_iter->av_forw = bp->av_forw; 863 if (bp == vdp->xdf_l_act) 864 vdp->xdf_l_act = bp_iter; 865 } 866 bp->av_forw = NULL; 867 } 868 869 static buf_t * 870 xdf_bp_next(xdf_t *vdp) 871 { 872 v_req_t *vreq; 873 buf_t *bp; 874 875 if (vdp->xdf_state == XD_CONNECTED) { 876 /* 877 * If we're in the XD_CONNECTED state, we only service IOs 878 * from the xdf_ready_tq_thread thread. 879 */ 880 if ((bp = vdp->xdf_ready_tq_bp) == NULL) 881 return (NULL); 882 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 883 return (bp); 884 return (NULL); 885 } 886 887 /* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */ 888 if (vdp->xdf_state != XD_READY) 889 return (NULL); 890 891 ASSERT(vdp->xdf_ready_tq_bp == NULL); 892 for (;;) { 893 if ((bp = vdp->xdf_i_act) == NULL) 894 return (NULL); 895 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 896 return (bp); 897 898 /* advance the active buf index pointer */ 899 vdp->xdf_i_act = bp->av_forw; 900 } 901 } 902 903 static void 904 xdf_io_fini(xdf_t *vdp, uint64_t id, int bioerr) 905 { 906 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 907 v_req_t *vreq = gs->gs_vreq; 908 buf_t *bp = vreq->v_buf; 909 910 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 911 ASSERT(BP_VREQ(bp) == vreq); 912 913 gs_free(gs); 914 915 if (bioerr != 0) 916 bioerror(bp, bioerr); 917 ASSERT(vreq->v_nslots > 0); 918 if (--vreq->v_nslots > 0) 919 return; 920 921 /* remove this IO from our active queue */ 922 xdf_bp_pop(vdp, bp); 923 924 ASSERT(vreq->v_runq); 925 xdf_kstat_exit(vdp, bp); 926 vreq->v_runq = B_FALSE; 927 vreq_free(vdp, vreq); 928 929 if (IS_ERROR(bp)) { 930 xdf_io_err(bp, geterror(bp), 0); 931 } else if (bp->b_resid != 0) { 932 /* Partial transfers are an error */ 933 xdf_io_err(bp, EIO, bp->b_resid); 934 } else { 935 biodone(bp); 936 } 937 } 938 939 /* 940 * xdf interrupt handler 941 */ 942 static uint_t 943 xdf_intr_locked(xdf_t *vdp) 944 { 945 xendev_ring_t *xbr; 946 blkif_response_t *resp; 947 int bioerr; 948 uint64_t id; 949 uint8_t op; 950 uint16_t status; 951 ddi_acc_handle_t acchdl; 952 953 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 954 955 if ((xbr = vdp->xdf_xb_ring) == NULL) 956 return (DDI_INTR_UNCLAIMED); 957 958 acchdl = vdp->xdf_xb_ring_hdl; 959 960 /* 961 * complete all requests which have a response 962 */ 963 while (resp = xvdi_ring_get_response(xbr)) { 964 id = ddi_get64(acchdl, &resp->id); 965 op = ddi_get8(acchdl, &resp->operation); 966 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 967 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 968 op, id, status)); 969 970 if (status != BLKIF_RSP_OKAY) { 971 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 972 vdp->xdf_addr, 973 (op == BLKIF_OP_READ) ? "reading" : "writing")); 974 bioerr = EIO; 975 } else { 976 bioerr = 0; 977 } 978 979 xdf_io_fini(vdp, id, bioerr); 980 } 981 return (DDI_INTR_CLAIMED); 982 } 983 984 static uint_t 985 xdf_intr(caddr_t arg) 986 { 987 xdf_t *vdp = (xdf_t *)arg; 988 int rv; 989 990 mutex_enter(&vdp->xdf_dev_lk); 991 rv = xdf_intr_locked(vdp); 992 mutex_exit(&vdp->xdf_dev_lk); 993 994 if (!do_polled_io) 995 xdf_io_start(vdp); 996 997 return (rv); 998 } 999 1000 static void 1001 xdf_ring_push(xdf_t *vdp) 1002 { 1003 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1004 1005 if (vdp->xdf_xb_ring == NULL) 1006 return; 1007 1008 if (xvdi_ring_push_request(vdp->xdf_xb_ring)) { 1009 DPRINTF(IO_DBG, ( 1010 "xdf@%s: xdf_ring_push: sent request(s) to backend\n", 1011 vdp->xdf_addr)); 1012 } 1013 1014 if (xvdi_get_evtchn(vdp->xdf_dip) != INVALID_EVTCHN) 1015 xvdi_notify_oe(vdp->xdf_dip); 1016 } 1017 1018 static int 1019 xdf_ring_drain_locked(xdf_t *vdp) 1020 { 1021 int pollc, rv = 0; 1022 1023 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1024 1025 if (xdf_debug & SUSRES_DBG) 1026 xen_printf("xdf_ring_drain: start\n"); 1027 1028 for (pollc = 0; pollc < XDF_DRAIN_RETRY_COUNT; pollc++) { 1029 if (vdp->xdf_xb_ring == NULL) 1030 goto out; 1031 1032 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1033 (void) xdf_intr_locked(vdp); 1034 if (!xvdi_ring_has_incomp_request(vdp->xdf_xb_ring)) 1035 goto out; 1036 xdf_ring_push(vdp); 1037 1038 /* file-backed devices can be slow */ 1039 mutex_exit(&vdp->xdf_dev_lk); 1040 #ifdef XPV_HVM_DRIVER 1041 (void) HYPERVISOR_yield(); 1042 #endif /* XPV_HVM_DRIVER */ 1043 delay(drv_usectohz(XDF_DRAIN_MSEC_DELAY)); 1044 mutex_enter(&vdp->xdf_dev_lk); 1045 } 1046 cmn_err(CE_WARN, "xdf@%s: xdf_ring_drain: timeout", vdp->xdf_addr); 1047 1048 out: 1049 if (vdp->xdf_xb_ring != NULL) { 1050 if (xvdi_ring_has_incomp_request(vdp->xdf_xb_ring) || 1051 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1052 rv = EIO; 1053 } 1054 if (xdf_debug & SUSRES_DBG) 1055 xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n", 1056 vdp->xdf_addr, rv); 1057 return (rv); 1058 } 1059 1060 static int 1061 xdf_ring_drain(xdf_t *vdp) 1062 { 1063 int rv; 1064 mutex_enter(&vdp->xdf_dev_lk); 1065 rv = xdf_ring_drain_locked(vdp); 1066 mutex_exit(&vdp->xdf_dev_lk); 1067 return (rv); 1068 } 1069 1070 /* 1071 * Destroy all v_req_t, grant table entries, and our ring buffer. 1072 */ 1073 static void 1074 xdf_ring_destroy(xdf_t *vdp) 1075 { 1076 v_req_t *vreq; 1077 buf_t *bp; 1078 ge_slot_t *gs; 1079 1080 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1081 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1082 1083 if ((vdp->xdf_state != XD_INIT) && 1084 (vdp->xdf_state != XD_CONNECTED) && 1085 (vdp->xdf_state != XD_READY)) { 1086 ASSERT(vdp->xdf_xb_ring == NULL); 1087 ASSERT(vdp->xdf_xb_ring_hdl == NULL); 1088 ASSERT(vdp->xdf_peer == INVALID_DOMID); 1089 ASSERT(vdp->xdf_evtchn == INVALID_EVTCHN); 1090 ASSERT(list_is_empty(&vdp->xdf_vreq_act)); 1091 return; 1092 } 1093 1094 /* 1095 * We don't want to recieve async notifications from the backend 1096 * when it finishes processing ring entries. 1097 */ 1098 #ifdef XPV_HVM_DRIVER 1099 ec_unbind_evtchn(vdp->xdf_evtchn); 1100 #else /* !XPV_HVM_DRIVER */ 1101 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1102 #endif /* !XPV_HVM_DRIVER */ 1103 1104 /* 1105 * Drain any requests in the ring. We need to do this before we 1106 * can free grant table entries, because if active ring entries 1107 * point to grants, then the backend could be trying to access 1108 * those grants. 1109 */ 1110 (void) xdf_ring_drain_locked(vdp); 1111 1112 /* We're done talking to the backend so free up our event channel */ 1113 xvdi_free_evtchn(vdp->xdf_dip); 1114 vdp->xdf_evtchn = INVALID_EVTCHN; 1115 1116 while ((vreq = list_head(&vdp->xdf_vreq_act)) != NULL) { 1117 bp = vreq->v_buf; 1118 ASSERT(BP_VREQ(bp) == vreq); 1119 1120 /* Free up any grant table entries associaed with this IO */ 1121 while ((gs = list_head(&vreq->v_gs)) != NULL) 1122 gs_free(gs); 1123 1124 /* If this IO was on the runq, move it back to the waitq. */ 1125 if (vreq->v_runq) 1126 xdf_kstat_runq_to_waitq(vdp, bp); 1127 1128 /* 1129 * Reset any buf IO state since we're going to re-issue the 1130 * IO when we reconnect. 1131 */ 1132 vreq_free(vdp, vreq); 1133 BP_VREQ_SET(bp, NULL); 1134 bioerror(bp, 0); 1135 } 1136 1137 /* reset the active queue index pointer */ 1138 vdp->xdf_i_act = vdp->xdf_f_act; 1139 1140 /* Destroy the ring */ 1141 xvdi_free_ring(vdp->xdf_xb_ring); 1142 vdp->xdf_xb_ring = NULL; 1143 vdp->xdf_xb_ring_hdl = NULL; 1144 vdp->xdf_peer = INVALID_DOMID; 1145 } 1146 1147 void 1148 xdfmin(struct buf *bp) 1149 { 1150 if (bp->b_bcount > xdf_maxphys) 1151 bp->b_bcount = xdf_maxphys; 1152 } 1153 1154 /* 1155 * Check if we have a pending "eject" media request. 1156 */ 1157 static int 1158 xdf_eject_pending(xdf_t *vdp) 1159 { 1160 dev_info_t *dip = vdp->xdf_dip; 1161 char *xsname, *str; 1162 1163 if (!vdp->xdf_media_req_supported) 1164 return (B_FALSE); 1165 1166 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1167 (xenbus_read_str(xsname, XBP_MEDIA_REQ, &str) != 0)) 1168 return (B_FALSE); 1169 1170 if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) { 1171 strfree(str); 1172 return (B_FALSE); 1173 } 1174 strfree(str); 1175 return (B_TRUE); 1176 } 1177 1178 /* 1179 * Generate a media request. 1180 */ 1181 static int 1182 xdf_media_req(xdf_t *vdp, char *req, boolean_t media_required) 1183 { 1184 dev_info_t *dip = vdp->xdf_dip; 1185 char *xsname; 1186 1187 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1188 1189 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1190 return (ENXIO); 1191 1192 /* Check if we support media requests */ 1193 if (!XD_IS_CD(vdp) || !vdp->xdf_media_req_supported) 1194 return (ENOTTY); 1195 1196 /* If an eject is pending then don't allow any new requests */ 1197 if (xdf_eject_pending(vdp)) 1198 return (ENXIO); 1199 1200 /* Make sure that there is media present */ 1201 if (media_required && (vdp->xdf_xdev_nblocks == 0)) 1202 return (ENXIO); 1203 1204 /* We only allow operations when the device is ready and connected */ 1205 if (vdp->xdf_state != XD_READY) 1206 return (EIO); 1207 1208 if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ, "%s", req) != 0) 1209 return (EIO); 1210 1211 return (0); 1212 } 1213 1214 /* 1215 * populate a single blkif_request_t w/ a buf 1216 */ 1217 static void 1218 xdf_process_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1219 { 1220 grant_ref_t gr; 1221 uint8_t fsect, lsect; 1222 size_t bcnt; 1223 paddr_t dma_addr; 1224 off_t blk_off; 1225 dev_info_t *dip = vdp->xdf_dip; 1226 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1227 v_req_t *vreq = BP_VREQ(bp); 1228 uint64_t blkno = vreq->v_blkno; 1229 uint_t ndmacs = vreq->v_ndmacs; 1230 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1231 int seg = 0; 1232 int isread = IS_READ(bp); 1233 ge_slot_t *gs = list_head(&vreq->v_gs); 1234 1235 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1236 ASSERT(vreq->v_status == VREQ_GS_ALLOCED); 1237 1238 if (isread) 1239 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1240 else { 1241 switch (vreq->v_flush_diskcache) { 1242 case FLUSH_DISKCACHE: 1243 ddi_put8(acchdl, &rreq->operation, 1244 BLKIF_OP_FLUSH_DISKCACHE); 1245 ddi_put16(acchdl, &rreq->handle, vdev); 1246 ddi_put64(acchdl, &rreq->id, 1247 (uint64_t)(uintptr_t)(gs)); 1248 ddi_put8(acchdl, &rreq->nr_segments, 0); 1249 vreq->v_status = VREQ_DMAWIN_DONE; 1250 return; 1251 case WRITE_BARRIER: 1252 ddi_put8(acchdl, &rreq->operation, 1253 BLKIF_OP_WRITE_BARRIER); 1254 break; 1255 default: 1256 if (!vdp->xdf_wce) 1257 ddi_put8(acchdl, &rreq->operation, 1258 BLKIF_OP_WRITE_BARRIER); 1259 else 1260 ddi_put8(acchdl, &rreq->operation, 1261 BLKIF_OP_WRITE); 1262 break; 1263 } 1264 } 1265 1266 ddi_put16(acchdl, &rreq->handle, vdev); 1267 ddi_put64(acchdl, &rreq->sector_number, blkno); 1268 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(gs)); 1269 1270 /* 1271 * loop until all segments are populated or no more dma cookie in buf 1272 */ 1273 for (;;) { 1274 /* 1275 * Each segment of a blkif request can transfer up to 1276 * one 4K page of data. 1277 */ 1278 bcnt = vreq->v_dmac.dmac_size; 1279 dma_addr = vreq->v_dmac.dmac_laddress; 1280 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1281 fsect = blk_off >> XB_BSHIFT; 1282 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1283 1284 ASSERT(bcnt <= PAGESIZE); 1285 ASSERT((bcnt % XB_BSIZE) == 0); 1286 ASSERT((blk_off & XB_BMASK) == 0); 1287 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1288 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1289 1290 gr = gs_grant(gs, PATOMA(dma_addr) >> PAGESHIFT); 1291 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1292 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1293 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1294 1295 DPRINTF(IO_DBG, ( 1296 "xdf@%s: seg%d: dmacS %lu blk_off %ld\n", 1297 vdp->xdf_addr, seg, vreq->v_dmac.dmac_size, blk_off)); 1298 DPRINTF(IO_DBG, ( 1299 "xdf@%s: seg%d: fs %d ls %d gr %d dma 0x%"PRIx64"\n", 1300 vdp->xdf_addr, seg, fsect, lsect, gr, dma_addr)); 1301 1302 blkno += (bcnt >> XB_BSHIFT); 1303 seg++; 1304 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1305 if (--ndmacs) { 1306 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1307 continue; 1308 } 1309 1310 vreq->v_status = VREQ_DMAWIN_DONE; 1311 vreq->v_blkno = blkno; 1312 break; 1313 } 1314 ddi_put8(acchdl, &rreq->nr_segments, seg); 1315 DPRINTF(IO_DBG, ( 1316 "xdf@%s: xdf_process_rreq: request id=%"PRIx64" ready\n", 1317 vdp->xdf_addr, rreq->id)); 1318 } 1319 1320 static void 1321 xdf_io_start(xdf_t *vdp) 1322 { 1323 struct buf *bp; 1324 v_req_t *vreq; 1325 blkif_request_t *rreq; 1326 boolean_t rreqready = B_FALSE; 1327 1328 mutex_enter(&vdp->xdf_dev_lk); 1329 1330 /* 1331 * Populate the ring request(s). Loop until there is no buf to 1332 * transfer or no free slot available in I/O ring. 1333 */ 1334 for (;;) { 1335 /* don't start any new IO if we're suspending */ 1336 if (vdp->xdf_suspending) 1337 break; 1338 if ((bp = xdf_bp_next(vdp)) == NULL) 1339 break; 1340 1341 /* if the buf doesn't already have a vreq, allocate one */ 1342 if (((vreq = BP_VREQ(bp)) == NULL) && 1343 ((vreq = vreq_get(vdp, bp)) == NULL)) 1344 break; 1345 1346 /* alloc DMA/GTE resources */ 1347 if (vreq_setup(vdp, vreq) != DDI_SUCCESS) 1348 break; 1349 1350 /* get next blkif_request in the ring */ 1351 if ((rreq = xvdi_ring_get_request(vdp->xdf_xb_ring)) == NULL) 1352 break; 1353 bzero(rreq, sizeof (blkif_request_t)); 1354 rreqready = B_TRUE; 1355 1356 /* populate blkif_request with this buf */ 1357 xdf_process_rreq(vdp, bp, rreq); 1358 1359 /* 1360 * This buffer/vreq pair is has been allocated a ring buffer 1361 * resources, so if it isn't already in our runq, add it. 1362 */ 1363 if (!vreq->v_runq) 1364 xdf_kstat_waitq_to_runq(vdp, bp); 1365 } 1366 1367 /* Send the request(s) to the backend */ 1368 if (rreqready) 1369 xdf_ring_push(vdp); 1370 1371 mutex_exit(&vdp->xdf_dev_lk); 1372 } 1373 1374 1375 /* check if partition is open, -1 - check all partitions on the disk */ 1376 static boolean_t 1377 xdf_isopen(xdf_t *vdp, int partition) 1378 { 1379 int i; 1380 ulong_t parbit; 1381 boolean_t rval = B_FALSE; 1382 1383 ASSERT((partition == -1) || 1384 ((partition >= 0) || (partition < XDF_PEXT))); 1385 1386 if (partition == -1) 1387 parbit = (ulong_t)-1; 1388 else 1389 parbit = 1 << partition; 1390 1391 for (i = 0; i < OTYPCNT; i++) { 1392 if (vdp->xdf_vd_open[i] & parbit) 1393 rval = B_TRUE; 1394 } 1395 1396 return (rval); 1397 } 1398 1399 /* 1400 * The connection should never be closed as long as someone is holding 1401 * us open, there is pending IO, or someone is waiting waiting for a 1402 * connection. 1403 */ 1404 static boolean_t 1405 xdf_busy(xdf_t *vdp) 1406 { 1407 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1408 1409 if ((vdp->xdf_xb_ring != NULL) && 1410 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 1411 ASSERT(vdp->xdf_state != XD_CLOSED); 1412 return (B_TRUE); 1413 } 1414 1415 if (!list_is_empty(&vdp->xdf_vreq_act) || (vdp->xdf_f_act != NULL)) { 1416 ASSERT(vdp->xdf_state != XD_CLOSED); 1417 return (B_TRUE); 1418 } 1419 1420 if (xdf_isopen(vdp, -1)) { 1421 ASSERT(vdp->xdf_state != XD_CLOSED); 1422 return (B_TRUE); 1423 } 1424 1425 if (vdp->xdf_connect_req > 0) { 1426 ASSERT(vdp->xdf_state != XD_CLOSED); 1427 return (B_TRUE); 1428 } 1429 1430 return (B_FALSE); 1431 } 1432 1433 static void 1434 xdf_set_state(xdf_t *vdp, xdf_state_t new_state) 1435 { 1436 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1437 DPRINTF(DDI_DBG, ("xdf@%s: state change %d -> %d\n", 1438 vdp->xdf_addr, vdp->xdf_state, new_state)); 1439 vdp->xdf_state = new_state; 1440 cv_broadcast(&vdp->xdf_dev_cv); 1441 } 1442 1443 static void 1444 xdf_disconnect(xdf_t *vdp, xdf_state_t new_state, boolean_t quiet) 1445 { 1446 dev_info_t *dip = vdp->xdf_dip; 1447 boolean_t busy; 1448 1449 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1450 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1451 ASSERT((new_state == XD_UNKNOWN) || (new_state == XD_CLOSED)); 1452 1453 /* Check if we're already there. */ 1454 if (vdp->xdf_state == new_state) 1455 return; 1456 1457 mutex_enter(&vdp->xdf_dev_lk); 1458 busy = xdf_busy(vdp); 1459 1460 /* If we're already closed then there's nothing todo. */ 1461 if (vdp->xdf_state == XD_CLOSED) { 1462 ASSERT(!busy); 1463 xdf_set_state(vdp, new_state); 1464 mutex_exit(&vdp->xdf_dev_lk); 1465 return; 1466 } 1467 1468 #ifdef DEBUG 1469 /* UhOh. Warn the user that something bad has happened. */ 1470 if (!quiet && busy && (vdp->xdf_state == XD_READY) && 1471 (vdp->xdf_xdev_nblocks != 0)) { 1472 cmn_err(CE_WARN, "xdf@%s: disconnected while in use", 1473 vdp->xdf_addr); 1474 } 1475 #endif /* DEBUG */ 1476 1477 xdf_ring_destroy(vdp); 1478 1479 /* If we're busy then we can only go into the unknown state */ 1480 xdf_set_state(vdp, (busy) ? XD_UNKNOWN : new_state); 1481 mutex_exit(&vdp->xdf_dev_lk); 1482 1483 /* if we're closed now, let the other end know */ 1484 if (vdp->xdf_state == XD_CLOSED) 1485 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1486 } 1487 1488 1489 /* 1490 * Kick-off connect process 1491 * Status should be XD_UNKNOWN or XD_CLOSED 1492 * On success, status will be changed to XD_INIT 1493 * On error, it will be changed to XD_UNKNOWN 1494 */ 1495 static int 1496 xdf_setstate_init(xdf_t *vdp) 1497 { 1498 dev_info_t *dip = vdp->xdf_dip; 1499 xenbus_transaction_t xbt; 1500 grant_ref_t gref; 1501 char *xsname, *str; 1502 int rv; 1503 1504 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1505 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1506 ASSERT((vdp->xdf_state == XD_UNKNOWN) || 1507 (vdp->xdf_state == XD_CLOSED)); 1508 1509 DPRINTF(DDI_DBG, 1510 ("xdf@%s: starting connection process\n", vdp->xdf_addr)); 1511 1512 /* 1513 * If an eject is pending then don't allow a new connection, but 1514 * we want to return without displaying an error message. 1515 */ 1516 if (xdf_eject_pending(vdp)) { 1517 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1518 return (DDI_FAILURE); 1519 } 1520 1521 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1522 goto errout; 1523 1524 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == INVALID_DOMID) 1525 goto errout; 1526 1527 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialising); 1528 1529 /* 1530 * Sanity check for the existance of the xenbus device-type property. 1531 * This property might not exist if we our xenbus device nodes was 1532 * force destroyed while we were still connected to the backend. 1533 */ 1534 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) 1535 goto errout; 1536 strfree(str); 1537 1538 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) 1539 goto errout; 1540 1541 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1542 #ifdef XPV_HVM_DRIVER 1543 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1544 #else /* !XPV_HVM_DRIVER */ 1545 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1546 DDI_SUCCESS) { 1547 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_init: " 1548 "failed to add intr handler", vdp->xdf_addr); 1549 goto errout1; 1550 } 1551 #endif /* !XPV_HVM_DRIVER */ 1552 1553 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1554 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1555 DDI_SUCCESS) { 1556 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1557 vdp->xdf_addr); 1558 goto errout2; 1559 } 1560 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1561 1562 /* 1563 * Write into xenstore the info needed by backend 1564 */ 1565 trans_retry: 1566 if (xenbus_transaction_start(&xbt)) { 1567 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1568 vdp->xdf_addr); 1569 xvdi_fatal_error(dip, EIO, "connect transaction init"); 1570 goto fail_trans; 1571 } 1572 1573 /* 1574 * XBP_PROTOCOL is written by the domain builder in the case of PV 1575 * domains. However, it is not written for HVM domains, so let's 1576 * write it here. 1577 */ 1578 if (((rv = xenbus_printf(xbt, xsname, 1579 XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE)) != 0) || 1580 ((rv = xenbus_printf(xbt, xsname, 1581 XBP_RING_REF, "%u", gref)) != 0) || 1582 ((rv = xenbus_printf(xbt, xsname, 1583 XBP_EVENT_CHAN, "%u", vdp->xdf_evtchn)) != 0) || 1584 ((rv = xenbus_printf(xbt, xsname, 1585 XBP_PROTOCOL, "%s", XEN_IO_PROTO_ABI_NATIVE)) != 0) || 1586 ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0)) { 1587 (void) xenbus_transaction_end(xbt, 1); 1588 xvdi_fatal_error(dip, rv, "connect transaction setup"); 1589 goto fail_trans; 1590 } 1591 1592 /* kick-off connect process */ 1593 if (rv = xenbus_transaction_end(xbt, 0)) { 1594 if (rv == EAGAIN) 1595 goto trans_retry; 1596 xvdi_fatal_error(dip, rv, "connect transaction commit"); 1597 goto fail_trans; 1598 } 1599 1600 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1601 mutex_enter(&vdp->xdf_dev_lk); 1602 xdf_set_state(vdp, XD_INIT); 1603 mutex_exit(&vdp->xdf_dev_lk); 1604 1605 return (DDI_SUCCESS); 1606 1607 fail_trans: 1608 xvdi_free_ring(vdp->xdf_xb_ring); 1609 errout2: 1610 #ifdef XPV_HVM_DRIVER 1611 ec_unbind_evtchn(vdp->xdf_evtchn); 1612 #else /* !XPV_HVM_DRIVER */ 1613 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1614 #endif /* !XPV_HVM_DRIVER */ 1615 errout1: 1616 xvdi_free_evtchn(dip); 1617 vdp->xdf_evtchn = INVALID_EVTCHN; 1618 errout: 1619 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1620 cmn_err(CE_WARN, "xdf@%s: failed to start connection to backend", 1621 vdp->xdf_addr); 1622 return (DDI_FAILURE); 1623 } 1624 1625 int 1626 xdf_get_flush_block(xdf_t *vdp) 1627 { 1628 /* 1629 * Get a DEV_BSIZE aligned bufer 1630 */ 1631 vdp->xdf_flush_mem = kmem_alloc(DEV_BSIZE * 2, KM_SLEEP); 1632 vdp->xdf_cache_flush_block = 1633 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), DEV_BSIZE); 1634 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1635 xdf_flush_block, DEV_BSIZE, NULL) != 0) 1636 return (DDI_FAILURE); 1637 return (DDI_SUCCESS); 1638 } 1639 1640 static void 1641 xdf_setstate_ready(void *arg) 1642 { 1643 xdf_t *vdp = (xdf_t *)arg; 1644 1645 vdp->xdf_ready_tq_thread = curthread; 1646 1647 /* 1648 * We've created all the minor nodes via cmlb_attach() using default 1649 * value in xdf_attach() to make it possible to block in xdf_open(), 1650 * in case there's anyone (say, booting thread) ever trying to open 1651 * it before connected to backend. We will refresh all those minor 1652 * nodes w/ latest info we've got now when we are almost connected. 1653 */ 1654 mutex_enter(&vdp->xdf_dev_lk); 1655 if (vdp->xdf_cmbl_reattach) { 1656 vdp->xdf_cmbl_reattach = B_FALSE; 1657 1658 mutex_exit(&vdp->xdf_dev_lk); 1659 if (xdf_cmlb_attach(vdp) != 0) { 1660 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1661 return; 1662 } 1663 mutex_enter(&vdp->xdf_dev_lk); 1664 } 1665 1666 /* If we're not still trying to get to the ready state, then bail. */ 1667 if (vdp->xdf_state != XD_CONNECTED) { 1668 mutex_exit(&vdp->xdf_dev_lk); 1669 return; 1670 } 1671 mutex_exit(&vdp->xdf_dev_lk); 1672 1673 /* 1674 * If backend has feature-barrier, see if it supports disk 1675 * cache flush op. 1676 */ 1677 vdp->xdf_flush_supported = B_FALSE; 1678 if (vdp->xdf_feature_barrier) { 1679 /* 1680 * Pretend we already know flush is supported so probe 1681 * will attempt the correct op. 1682 */ 1683 vdp->xdf_flush_supported = B_TRUE; 1684 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1685 vdp->xdf_flush_supported = B_TRUE; 1686 } else { 1687 vdp->xdf_flush_supported = B_FALSE; 1688 /* 1689 * If the other end does not support the cache flush op 1690 * then we must use a barrier-write to force disk 1691 * cache flushing. Barrier writes require that a data 1692 * block actually be written. 1693 * Cache a block to barrier-write when we are 1694 * asked to perform a flush. 1695 * XXX - would it be better to just copy 1 block 1696 * (512 bytes) from whatever write we did last 1697 * and rewrite that block? 1698 */ 1699 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) { 1700 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1701 return; 1702 } 1703 } 1704 } 1705 1706 mutex_enter(&vdp->xdf_cb_lk); 1707 mutex_enter(&vdp->xdf_dev_lk); 1708 if (vdp->xdf_state == XD_CONNECTED) 1709 xdf_set_state(vdp, XD_READY); 1710 mutex_exit(&vdp->xdf_dev_lk); 1711 1712 /* Restart any currently queued up io */ 1713 xdf_io_start(vdp); 1714 1715 mutex_exit(&vdp->xdf_cb_lk); 1716 } 1717 1718 /* 1719 * synthetic geometry 1720 */ 1721 #define XDF_NSECTS 256 1722 #define XDF_NHEADS 16 1723 1724 static void 1725 xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp) 1726 { 1727 xdf_t *vdp; 1728 uint_t ncyl; 1729 1730 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 1731 1732 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); 1733 1734 bzero(geomp, sizeof (*geomp)); 1735 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1736 geomp->g_acyl = 0; 1737 geomp->g_nhead = XDF_NHEADS; 1738 geomp->g_nsect = XDF_NSECTS; 1739 geomp->g_secsize = XB_BSIZE; 1740 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1741 geomp->g_intrlv = 0; 1742 geomp->g_rpm = 7200; 1743 } 1744 1745 /* 1746 * Finish other initialization after we've connected to backend 1747 * Status should be XD_INIT before calling this routine 1748 * On success, status should be changed to XD_CONNECTED. 1749 * On error, status should stay XD_INIT 1750 */ 1751 static int 1752 xdf_setstate_connected(xdf_t *vdp) 1753 { 1754 dev_info_t *dip = vdp->xdf_dip; 1755 cmlb_geom_t pgeom; 1756 diskaddr_t nblocks = 0; 1757 char *oename, *xsname, *str; 1758 uint_t dinfo; 1759 1760 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1761 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1762 ASSERT(vdp->xdf_state == XD_INIT); 1763 1764 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1765 ((oename = xvdi_get_oename(dip)) == NULL)) 1766 return (DDI_FAILURE); 1767 1768 /* Determine if feature barrier is supported by backend */ 1769 if (!(vdp->xdf_feature_barrier = xenbus_exists(oename, XBP_FB))) 1770 cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier", 1771 vdp->xdf_addr); 1772 1773 /* 1774 * Probe backend. Read the device size into xdf_xdev_nblocks 1775 * and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE 1776 * flags in xdf_dinfo. If the emulated device type is "cdrom", 1777 * we always set VDISK_CDROM, regardless of if it's present in 1778 * the xenbus info parameter. 1779 */ 1780 if (xenbus_gather(XBT_NULL, oename, 1781 XBP_SECTORS, "%"SCNu64, &nblocks, 1782 XBP_INFO, "%u", &dinfo, 1783 NULL) != 0) { 1784 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1785 "cannot read backend info", vdp->xdf_addr); 1786 return (DDI_FAILURE); 1787 } 1788 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 1789 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 1790 vdp->xdf_addr); 1791 return (DDI_FAILURE); 1792 } 1793 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 1794 dinfo |= VDISK_CDROM; 1795 strfree(str); 1796 1797 vdp->xdf_xdev_nblocks = nblocks; 1798 #ifdef _ILP32 1799 if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) { 1800 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1801 "backend disk device too large with %llu blocks for" 1802 " 32-bit kernel", vdp->xdf_addr, vdp->xdf_xdev_nblocks); 1803 xvdi_fatal_error(dip, EFBIG, "reading backend info"); 1804 return (DDI_FAILURE); 1805 } 1806 #endif 1807 1808 /* 1809 * If the physical geometry for a fixed disk has been explicity 1810 * set then make sure that the specified physical geometry isn't 1811 * larger than the device we connected to. 1812 */ 1813 if (vdp->xdf_pgeom_fixed && 1814 (vdp->xdf_pgeom.g_capacity > vdp->xdf_xdev_nblocks)) { 1815 cmn_err(CE_WARN, 1816 "xdf@%s: connect failed, fixed geometry too large", 1817 vdp->xdf_addr); 1818 return (DDI_FAILURE); 1819 } 1820 1821 vdp->xdf_media_req_supported = xenbus_exists(oename, XBP_MEDIA_REQ_SUP); 1822 1823 /* mark vbd is ready for I/O */ 1824 mutex_enter(&vdp->xdf_dev_lk); 1825 xdf_set_state(vdp, XD_CONNECTED); 1826 1827 /* check if the cmlb label should be updated */ 1828 xdf_synthetic_pgeom(dip, &pgeom); 1829 if ((vdp->xdf_dinfo != dinfo) || 1830 (!vdp->xdf_pgeom_fixed && 1831 (memcmp(&vdp->xdf_pgeom, &pgeom, sizeof (pgeom)) != 0))) { 1832 vdp->xdf_cmbl_reattach = B_TRUE; 1833 1834 vdp->xdf_dinfo = dinfo; 1835 if (!vdp->xdf_pgeom_fixed) 1836 vdp->xdf_pgeom = pgeom; 1837 } 1838 1839 if (XD_IS_CD(vdp) || XD_IS_RM(vdp)) { 1840 if (vdp->xdf_xdev_nblocks == 0) { 1841 vdp->xdf_mstate = DKIO_EJECTED; 1842 cv_broadcast(&vdp->xdf_mstate_cv); 1843 } else { 1844 vdp->xdf_mstate = DKIO_INSERTED; 1845 cv_broadcast(&vdp->xdf_mstate_cv); 1846 } 1847 } else { 1848 if (vdp->xdf_mstate != DKIO_NONE) { 1849 vdp->xdf_mstate = DKIO_NONE; 1850 cv_broadcast(&vdp->xdf_mstate_cv); 1851 } 1852 } 1853 1854 mutex_exit(&vdp->xdf_dev_lk); 1855 1856 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", vdp->xdf_addr, 1857 (uint64_t)vdp->xdf_xdev_nblocks); 1858 1859 /* Restart any currently queued up io */ 1860 xdf_io_start(vdp); 1861 1862 /* 1863 * To get to the ready state we have to do IO to the backend device, 1864 * but we can't initiate IO from the other end change callback thread 1865 * (which is the current context we're executing in.) This is because 1866 * if the other end disconnects while we're doing IO from the callback 1867 * thread, then we can't recieve that disconnect event and we hang 1868 * waiting for an IO that can never complete. 1869 */ 1870 (void) ddi_taskq_dispatch(vdp->xdf_ready_tq, xdf_setstate_ready, vdp, 1871 DDI_SLEEP); 1872 1873 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1874 return (DDI_SUCCESS); 1875 } 1876 1877 /*ARGSUSED*/ 1878 static void 1879 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 1880 { 1881 XenbusState new_state = *(XenbusState *)impl_data; 1882 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 1883 1884 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 1885 vdp->xdf_addr, new_state)); 1886 1887 mutex_enter(&vdp->xdf_cb_lk); 1888 1889 /* We assume that this callback is single threaded */ 1890 ASSERT(vdp->xdf_oe_change_thread == NULL); 1891 DEBUG_EVAL(vdp->xdf_oe_change_thread = curthread); 1892 1893 /* ignore any backend state changes if we're suspending/suspended */ 1894 if (vdp->xdf_suspending || (vdp->xdf_state == XD_SUSPEND)) { 1895 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1896 mutex_exit(&vdp->xdf_cb_lk); 1897 return; 1898 } 1899 1900 switch (new_state) { 1901 case XenbusStateUnknown: 1902 case XenbusStateInitialising: 1903 case XenbusStateInitWait: 1904 case XenbusStateInitialised: 1905 if (vdp->xdf_state == XD_INIT) 1906 break; 1907 1908 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1909 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1910 break; 1911 ASSERT(vdp->xdf_state == XD_INIT); 1912 break; 1913 1914 case XenbusStateConnected: 1915 if ((vdp->xdf_state == XD_CONNECTED) || 1916 (vdp->xdf_state == XD_READY)) 1917 break; 1918 1919 if (vdp->xdf_state != XD_INIT) { 1920 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1921 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1922 break; 1923 ASSERT(vdp->xdf_state == XD_INIT); 1924 } 1925 1926 if (xdf_setstate_connected(vdp) != DDI_SUCCESS) { 1927 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1928 break; 1929 } 1930 ASSERT(vdp->xdf_state == XD_CONNECTED); 1931 break; 1932 1933 case XenbusStateClosing: 1934 if (xdf_isopen(vdp, -1)) { 1935 cmn_err(CE_NOTE, 1936 "xdf@%s: hot-unplug failed, still in use", 1937 vdp->xdf_addr); 1938 break; 1939 } 1940 /*FALLTHROUGH*/ 1941 case XenbusStateClosed: 1942 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 1943 break; 1944 } 1945 1946 /* notify anybody waiting for oe state change */ 1947 cv_broadcast(&vdp->xdf_dev_cv); 1948 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1949 mutex_exit(&vdp->xdf_cb_lk); 1950 } 1951 1952 static int 1953 xdf_connect_locked(xdf_t *vdp, boolean_t wait) 1954 { 1955 int rv; 1956 1957 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1958 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1959 1960 /* we can't connect once we're in the closed state */ 1961 if (vdp->xdf_state == XD_CLOSED) 1962 return (XD_CLOSED); 1963 1964 vdp->xdf_connect_req++; 1965 while (vdp->xdf_state != XD_READY) { 1966 mutex_exit(&vdp->xdf_dev_lk); 1967 if (vdp->xdf_state == XD_UNKNOWN) 1968 (void) xdf_setstate_init(vdp); 1969 mutex_enter(&vdp->xdf_dev_lk); 1970 1971 if (!wait || (vdp->xdf_state == XD_READY)) 1972 goto out; 1973 1974 mutex_exit((&vdp->xdf_cb_lk)); 1975 rv = cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk); 1976 mutex_exit((&vdp->xdf_dev_lk)); 1977 mutex_enter((&vdp->xdf_cb_lk)); 1978 mutex_enter((&vdp->xdf_dev_lk)); 1979 if (rv == 0) 1980 goto out; 1981 } 1982 1983 out: 1984 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1985 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1986 1987 /* Try to lock the media */ 1988 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 1989 1990 vdp->xdf_connect_req--; 1991 return (vdp->xdf_state); 1992 } 1993 1994 static uint_t 1995 xdf_iorestart(caddr_t arg) 1996 { 1997 xdf_t *vdp = (xdf_t *)arg; 1998 1999 ASSERT(vdp != NULL); 2000 2001 mutex_enter(&vdp->xdf_dev_lk); 2002 ASSERT(ISDMACBON(vdp)); 2003 SETDMACBOFF(vdp); 2004 mutex_exit(&vdp->xdf_dev_lk); 2005 2006 xdf_io_start(vdp); 2007 2008 return (DDI_INTR_CLAIMED); 2009 } 2010 2011 #if defined(XPV_HVM_DRIVER) 2012 2013 typedef struct xdf_hvm_entry { 2014 list_node_t xdf_he_list; 2015 char *xdf_he_path; 2016 dev_info_t *xdf_he_dip; 2017 } xdf_hvm_entry_t; 2018 2019 static list_t xdf_hvm_list; 2020 static kmutex_t xdf_hvm_list_lock; 2021 2022 static xdf_hvm_entry_t * 2023 i_xdf_hvm_find(const char *path, dev_info_t *dip) 2024 { 2025 xdf_hvm_entry_t *i; 2026 2027 ASSERT((path != NULL) || (dip != NULL)); 2028 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); 2029 2030 i = list_head(&xdf_hvm_list); 2031 while (i != NULL) { 2032 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { 2033 i = list_next(&xdf_hvm_list, i); 2034 continue; 2035 } 2036 if ((dip != NULL) && (i->xdf_he_dip != dip)) { 2037 i = list_next(&xdf_hvm_list, i); 2038 continue; 2039 } 2040 break; 2041 } 2042 return (i); 2043 } 2044 2045 dev_info_t * 2046 xdf_hvm_hold(const char *path) 2047 { 2048 xdf_hvm_entry_t *i; 2049 dev_info_t *dip; 2050 2051 mutex_enter(&xdf_hvm_list_lock); 2052 i = i_xdf_hvm_find(path, NULL); 2053 if (i == NULL) { 2054 mutex_exit(&xdf_hvm_list_lock); 2055 return (B_FALSE); 2056 } 2057 ndi_hold_devi(dip = i->xdf_he_dip); 2058 mutex_exit(&xdf_hvm_list_lock); 2059 return (dip); 2060 } 2061 2062 static void 2063 xdf_hvm_add(dev_info_t *dip) 2064 { 2065 xdf_hvm_entry_t *i; 2066 char *path; 2067 2068 /* figure out the path for the dip */ 2069 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 2070 (void) ddi_pathname(dip, path); 2071 2072 i = kmem_alloc(sizeof (*i), KM_SLEEP); 2073 i->xdf_he_dip = dip; 2074 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); 2075 2076 mutex_enter(&xdf_hvm_list_lock); 2077 ASSERT(i_xdf_hvm_find(path, NULL) == NULL); 2078 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); 2079 list_insert_head(&xdf_hvm_list, i); 2080 mutex_exit(&xdf_hvm_list_lock); 2081 2082 kmem_free(path, MAXPATHLEN); 2083 } 2084 2085 static void 2086 xdf_hvm_rm(dev_info_t *dip) 2087 { 2088 xdf_hvm_entry_t *i; 2089 2090 mutex_enter(&xdf_hvm_list_lock); 2091 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); 2092 list_remove(&xdf_hvm_list, i); 2093 mutex_exit(&xdf_hvm_list_lock); 2094 2095 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); 2096 kmem_free(i, sizeof (*i)); 2097 } 2098 2099 static void 2100 xdf_hvm_init(void) 2101 { 2102 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), 2103 offsetof(xdf_hvm_entry_t, xdf_he_list)); 2104 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); 2105 } 2106 2107 static void 2108 xdf_hvm_fini(void) 2109 { 2110 ASSERT(list_head(&xdf_hvm_list) == NULL); 2111 list_destroy(&xdf_hvm_list); 2112 mutex_destroy(&xdf_hvm_list_lock); 2113 } 2114 2115 boolean_t 2116 xdf_hvm_connect(dev_info_t *dip) 2117 { 2118 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2119 char *oename, *str; 2120 int rv; 2121 2122 mutex_enter(&vdp->xdf_cb_lk); 2123 mutex_enter(&vdp->xdf_dev_lk); 2124 2125 /* 2126 * Before try to establish a connection we need to wait for the 2127 * backend hotplug scripts to have run. Once they are run the 2128 * "<oename>/hotplug-status" property will be set to "connected". 2129 */ 2130 for (;;) { 2131 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2132 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 2133 2134 /* 2135 * Get the xenbus path to the backend device. Note that 2136 * we can't cache this path (and we look it up on each pass 2137 * through this loop) because it could change during 2138 * suspend, resume, and migration operations. 2139 */ 2140 if ((oename = xvdi_get_oename(dip)) == NULL) { 2141 mutex_exit(&vdp->xdf_dev_lk); 2142 mutex_exit(&vdp->xdf_cb_lk); 2143 return (B_FALSE); 2144 } 2145 2146 str = NULL; 2147 if ((xenbus_read_str(oename, XBP_HP_STATUS, &str) == 0) && 2148 (strcmp(str, XBV_HP_STATUS_CONN) == 0)) 2149 break; 2150 2151 if (str != NULL) 2152 strfree(str); 2153 2154 /* wait for an update to "<oename>/hotplug-status" */ 2155 mutex_exit(&vdp->xdf_dev_lk); 2156 if (cv_wait_sig(&vdp->xdf_hp_status_cv, &vdp->xdf_cb_lk) == 0) { 2157 /* we got interrupted by a signal */ 2158 mutex_exit(&vdp->xdf_cb_lk); 2159 return (B_FALSE); 2160 } 2161 mutex_enter(&vdp->xdf_dev_lk); 2162 } 2163 2164 /* Good news. The backend hotplug scripts have been run. */ 2165 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2166 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 2167 ASSERT(strcmp(str, XBV_HP_STATUS_CONN) == 0); 2168 strfree(str); 2169 2170 /* 2171 * If we're emulating a cd device and if the backend doesn't support 2172 * media request opreations, then we're not going to bother trying 2173 * to establish a connection for a couple reasons. First off, media 2174 * requests support is required to support operations like eject and 2175 * media locking. Second, other backend platforms like Linux don't 2176 * support hvm pv cdrom access. They don't even have a backend pv 2177 * driver for cdrom device nodes, so we don't want to block forever 2178 * waiting for a connection to a backend driver that doesn't exist. 2179 */ 2180 if (XD_IS_CD(vdp) && !xenbus_exists(oename, XBP_MEDIA_REQ_SUP)) { 2181 mutex_exit(&vdp->xdf_dev_lk); 2182 mutex_exit(&vdp->xdf_cb_lk); 2183 return (B_FALSE); 2184 } 2185 2186 rv = xdf_connect_locked(vdp, B_TRUE); 2187 mutex_exit(&vdp->xdf_dev_lk); 2188 mutex_exit(&vdp->xdf_cb_lk); 2189 2190 return ((rv == XD_READY) ? B_TRUE : B_FALSE); 2191 } 2192 2193 int 2194 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2195 { 2196 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2197 2198 /* sanity check the requested physical geometry */ 2199 mutex_enter(&vdp->xdf_dev_lk); 2200 if ((geomp->g_secsize != XB_BSIZE) || 2201 (geomp->g_capacity == 0)) { 2202 mutex_exit(&vdp->xdf_dev_lk); 2203 return (EINVAL); 2204 } 2205 2206 /* 2207 * If we've already connected to the backend device then make sure 2208 * we're not defining a physical geometry larger than our backend 2209 * device. 2210 */ 2211 if ((vdp->xdf_xdev_nblocks != 0) && 2212 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { 2213 mutex_exit(&vdp->xdf_dev_lk); 2214 return (EINVAL); 2215 } 2216 2217 bzero(&vdp->xdf_pgeom, sizeof (vdp->xdf_pgeom)); 2218 vdp->xdf_pgeom.g_ncyl = geomp->g_ncyl; 2219 vdp->xdf_pgeom.g_acyl = geomp->g_acyl; 2220 vdp->xdf_pgeom.g_nhead = geomp->g_nhead; 2221 vdp->xdf_pgeom.g_nsect = geomp->g_nsect; 2222 vdp->xdf_pgeom.g_secsize = geomp->g_secsize; 2223 vdp->xdf_pgeom.g_capacity = geomp->g_capacity; 2224 vdp->xdf_pgeom.g_intrlv = geomp->g_intrlv; 2225 vdp->xdf_pgeom.g_rpm = geomp->g_rpm; 2226 2227 vdp->xdf_pgeom_fixed = B_TRUE; 2228 mutex_exit(&vdp->xdf_dev_lk); 2229 2230 /* force a re-validation */ 2231 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 2232 2233 return (0); 2234 } 2235 2236 boolean_t 2237 xdf_is_cd(dev_info_t *dip) 2238 { 2239 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2240 boolean_t rv; 2241 2242 mutex_enter(&vdp->xdf_cb_lk); 2243 rv = XD_IS_CD(vdp); 2244 mutex_exit(&vdp->xdf_cb_lk); 2245 return (rv); 2246 } 2247 2248 boolean_t 2249 xdf_is_rm(dev_info_t *dip) 2250 { 2251 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2252 boolean_t rv; 2253 2254 mutex_enter(&vdp->xdf_cb_lk); 2255 rv = XD_IS_RM(vdp); 2256 mutex_exit(&vdp->xdf_cb_lk); 2257 return (rv); 2258 } 2259 2260 boolean_t 2261 xdf_media_req_supported(dev_info_t *dip) 2262 { 2263 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2264 boolean_t rv; 2265 2266 mutex_enter(&vdp->xdf_cb_lk); 2267 rv = vdp->xdf_media_req_supported; 2268 mutex_exit(&vdp->xdf_cb_lk); 2269 return (rv); 2270 } 2271 2272 #endif /* XPV_HVM_DRIVER */ 2273 2274 static int 2275 xdf_lb_getcap(dev_info_t *dip, diskaddr_t *capp) 2276 { 2277 xdf_t *vdp; 2278 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2279 2280 if (vdp == NULL) 2281 return (ENXIO); 2282 2283 mutex_enter(&vdp->xdf_dev_lk); 2284 *capp = vdp->xdf_pgeom.g_capacity; 2285 DPRINTF(LBL_DBG, ("xdf@%s:capacity %llu\n", vdp->xdf_addr, *capp)); 2286 mutex_exit(&vdp->xdf_dev_lk); 2287 return (0); 2288 } 2289 2290 static int 2291 xdf_lb_getpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2292 { 2293 xdf_t *vdp; 2294 2295 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 2296 return (ENXIO); 2297 *geomp = vdp->xdf_pgeom; 2298 return (0); 2299 } 2300 2301 /* 2302 * No real HBA, no geometry available from it 2303 */ 2304 /*ARGSUSED*/ 2305 static int 2306 xdf_lb_getvgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2307 { 2308 return (EINVAL); 2309 } 2310 2311 static int 2312 xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep) 2313 { 2314 xdf_t *vdp; 2315 2316 if (!(vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)))) 2317 return (ENXIO); 2318 2319 if (XD_IS_RO(vdp)) 2320 tgattributep->media_is_writable = 0; 2321 else 2322 tgattributep->media_is_writable = 1; 2323 return (0); 2324 } 2325 2326 /* ARGSUSED3 */ 2327 int 2328 xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 2329 { 2330 switch (cmd) { 2331 case TG_GETPHYGEOM: 2332 return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg)); 2333 case TG_GETVIRTGEOM: 2334 return (xdf_lb_getvgeom(dip, (cmlb_geom_t *)arg)); 2335 case TG_GETCAPACITY: 2336 return (xdf_lb_getcap(dip, (diskaddr_t *)arg)); 2337 case TG_GETBLOCKSIZE: 2338 *(uint32_t *)arg = XB_BSIZE; 2339 return (0); 2340 case TG_GETATTR: 2341 return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg)); 2342 default: 2343 return (ENOTTY); 2344 } 2345 } 2346 2347 /* ARGSUSED5 */ 2348 int 2349 xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp, 2350 diskaddr_t start, size_t reqlen, void *tg_cookie) 2351 { 2352 xdf_t *vdp; 2353 struct buf *bp; 2354 int err = 0; 2355 2356 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2357 2358 /* We don't allow IO from the oe_change callback thread */ 2359 ASSERT(curthread != vdp->xdf_oe_change_thread); 2360 2361 if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) 2362 return (EINVAL); 2363 2364 bp = getrbuf(KM_SLEEP); 2365 if (cmd == TG_READ) 2366 bp->b_flags = B_BUSY | B_READ; 2367 else 2368 bp->b_flags = B_BUSY | B_WRITE; 2369 bp->b_un.b_addr = bufp; 2370 bp->b_bcount = reqlen; 2371 bp->b_blkno = start; 2372 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 2373 2374 mutex_enter(&vdp->xdf_dev_lk); 2375 xdf_bp_push(vdp, bp); 2376 mutex_exit(&vdp->xdf_dev_lk); 2377 xdf_io_start(vdp); 2378 if (curthread == vdp->xdf_ready_tq_thread) 2379 (void) xdf_ring_drain(vdp); 2380 err = biowait(bp); 2381 ASSERT(bp->b_flags & B_DONE); 2382 freerbuf(bp); 2383 return (err); 2384 } 2385 2386 /* 2387 * Lock the current media. Set the media state to "lock". 2388 * (Media locks are only respected by the backend driver.) 2389 */ 2390 static int 2391 xdf_ioctl_mlock(xdf_t *vdp) 2392 { 2393 int rv; 2394 mutex_enter(&vdp->xdf_cb_lk); 2395 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2396 mutex_exit(&vdp->xdf_cb_lk); 2397 return (rv); 2398 } 2399 2400 /* 2401 * Release a media lock. Set the media state to "none". 2402 */ 2403 static int 2404 xdf_ioctl_munlock(xdf_t *vdp) 2405 { 2406 int rv; 2407 mutex_enter(&vdp->xdf_cb_lk); 2408 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_NONE, B_TRUE); 2409 mutex_exit(&vdp->xdf_cb_lk); 2410 return (rv); 2411 } 2412 2413 /* 2414 * Eject the current media. Ignores any media locks. (Media locks 2415 * are only for benifit of the the backend.) 2416 */ 2417 static int 2418 xdf_ioctl_eject(xdf_t *vdp) 2419 { 2420 int rv; 2421 2422 mutex_enter(&vdp->xdf_cb_lk); 2423 if ((rv = xdf_media_req(vdp, XBV_MEDIA_REQ_EJECT, B_FALSE)) != 0) { 2424 mutex_exit(&vdp->xdf_cb_lk); 2425 return (rv); 2426 } 2427 2428 /* 2429 * We've set the media requests xenbus parameter to eject, so now 2430 * disconnect from the backend, wait for the backend to clear 2431 * the media requets xenbus paramter, and then we can reconnect 2432 * to the backend. 2433 */ 2434 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 2435 mutex_enter(&vdp->xdf_dev_lk); 2436 if (xdf_connect_locked(vdp, B_TRUE) != XD_READY) { 2437 mutex_exit(&vdp->xdf_dev_lk); 2438 mutex_exit(&vdp->xdf_cb_lk); 2439 return (EIO); 2440 } 2441 mutex_exit(&vdp->xdf_dev_lk); 2442 mutex_exit(&vdp->xdf_cb_lk); 2443 return (0); 2444 } 2445 2446 /* 2447 * Watch for media state changes. This can be an insertion of a device 2448 * (triggered by a 'xm block-configure' request in another domain) or 2449 * the ejection of a device (triggered by a local "eject" operation). 2450 * For a full description of the DKIOCSTATE ioctl behavior see dkio(7I). 2451 */ 2452 static int 2453 xdf_dkstate(xdf_t *vdp, enum dkio_state mstate) 2454 { 2455 enum dkio_state prev_state; 2456 2457 mutex_enter(&vdp->xdf_cb_lk); 2458 prev_state = vdp->xdf_mstate; 2459 2460 if (vdp->xdf_mstate == mstate) { 2461 while (vdp->xdf_mstate == prev_state) { 2462 if (cv_wait_sig(&vdp->xdf_mstate_cv, 2463 &vdp->xdf_cb_lk) == 0) { 2464 mutex_exit(&vdp->xdf_cb_lk); 2465 return (EINTR); 2466 } 2467 } 2468 } 2469 2470 if ((prev_state != DKIO_INSERTED) && 2471 (vdp->xdf_mstate == DKIO_INSERTED)) { 2472 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2473 mutex_exit(&vdp->xdf_cb_lk); 2474 return (0); 2475 } 2476 2477 mutex_exit(&vdp->xdf_cb_lk); 2478 return (0); 2479 } 2480 2481 /*ARGSUSED*/ 2482 static int 2483 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 2484 int *rvalp) 2485 { 2486 minor_t minor = getminor(dev); 2487 int part = XDF_PART(minor); 2488 xdf_t *vdp; 2489 int rv; 2490 2491 if (((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) || 2492 (!xdf_isopen(vdp, part))) 2493 return (ENXIO); 2494 2495 DPRINTF(IOCTL_DBG, ("xdf@%s:ioctl: cmd %d (0x%x)\n", 2496 vdp->xdf_addr, cmd, cmd)); 2497 2498 switch (cmd) { 2499 default: 2500 return (ENOTTY); 2501 case DKIOCG_PHYGEOM: 2502 case DKIOCG_VIRTGEOM: 2503 case DKIOCGGEOM: 2504 case DKIOCSGEOM: 2505 case DKIOCGAPART: 2506 case DKIOCSAPART: 2507 case DKIOCGVTOC: 2508 case DKIOCSVTOC: 2509 case DKIOCPARTINFO: 2510 case DKIOCGEXTVTOC: 2511 case DKIOCSEXTVTOC: 2512 case DKIOCEXTPARTINFO: 2513 case DKIOCGMBOOT: 2514 case DKIOCSMBOOT: 2515 case DKIOCGETEFI: 2516 case DKIOCSETEFI: 2517 case DKIOCPARTITION: 2518 return (cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 2519 rvalp, NULL)); 2520 case FDEJECT: 2521 case DKIOCEJECT: 2522 case CDROMEJECT: 2523 return (xdf_ioctl_eject(vdp)); 2524 case DKIOCLOCK: 2525 return (xdf_ioctl_mlock(vdp)); 2526 case DKIOCUNLOCK: 2527 return (xdf_ioctl_munlock(vdp)); 2528 case CDROMREADOFFSET: { 2529 int offset = 0; 2530 if (!XD_IS_CD(vdp)) 2531 return (ENOTTY); 2532 if (ddi_copyout(&offset, (void *)arg, sizeof (int), mode)) 2533 return (EFAULT); 2534 return (0); 2535 } 2536 case DKIOCGMEDIAINFO: { 2537 struct dk_minfo media_info; 2538 2539 media_info.dki_lbsize = DEV_BSIZE; 2540 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; 2541 if (XD_IS_CD(vdp)) 2542 media_info.dki_media_type = DK_CDROM; 2543 else 2544 media_info.dki_media_type = DK_FIXED_DISK; 2545 2546 if (ddi_copyout(&media_info, (void *)arg, 2547 sizeof (struct dk_minfo), mode)) 2548 return (EFAULT); 2549 return (0); 2550 } 2551 case DKIOCINFO: { 2552 struct dk_cinfo info; 2553 2554 /* controller information */ 2555 if (XD_IS_CD(vdp)) 2556 info.dki_ctype = DKC_CDROM; 2557 else 2558 info.dki_ctype = DKC_VBD; 2559 2560 info.dki_cnum = 0; 2561 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 2562 2563 /* unit information */ 2564 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 2565 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 2566 info.dki_flags = DKI_FMTVOL; 2567 info.dki_partition = part; 2568 info.dki_maxtransfer = maxphys / DEV_BSIZE; 2569 info.dki_addr = 0; 2570 info.dki_space = 0; 2571 info.dki_prio = 0; 2572 info.dki_vec = 0; 2573 2574 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 2575 return (EFAULT); 2576 return (0); 2577 } 2578 case DKIOCSTATE: { 2579 enum dkio_state mstate; 2580 2581 if (ddi_copyin((void *)arg, &mstate, 2582 sizeof (mstate), mode) != 0) 2583 return (EFAULT); 2584 if ((rv = xdf_dkstate(vdp, mstate)) != 0) 2585 return (rv); 2586 mstate = vdp->xdf_mstate; 2587 if (ddi_copyout(&mstate, (void *)arg, 2588 sizeof (mstate), mode) != 0) 2589 return (EFAULT); 2590 return (0); 2591 } 2592 case DKIOCREMOVABLE: { 2593 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2594 if (ddi_copyout(&i, (caddr_t)arg, sizeof (i), mode)) 2595 return (EFAULT); 2596 return (0); 2597 } 2598 case DKIOCGETWCE: { 2599 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2600 if (ddi_copyout(&i, (void *)arg, sizeof (i), mode)) 2601 return (EFAULT); 2602 return (0); 2603 } 2604 case DKIOCSETWCE: { 2605 int i; 2606 if (ddi_copyin((void *)arg, &i, sizeof (i), mode)) 2607 return (EFAULT); 2608 vdp->xdf_wce = VOID2BOOLEAN(i); 2609 return (0); 2610 } 2611 case DKIOCFLUSHWRITECACHE: { 2612 struct dk_callback *dkc = (struct dk_callback *)arg; 2613 2614 if (vdp->xdf_flush_supported) { 2615 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2616 NULL, 0, 0, (void *)dev); 2617 } else if (vdp->xdf_feature_barrier && 2618 !xdf_barrier_flush_disable) { 2619 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2620 vdp->xdf_cache_flush_block, xdf_flush_block, 2621 DEV_BSIZE, (void *)dev); 2622 } else { 2623 return (ENOTTY); 2624 } 2625 if ((mode & FKIOCTL) && (dkc != NULL) && 2626 (dkc->dkc_callback != NULL)) { 2627 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 2628 /* need to return 0 after calling callback */ 2629 rv = 0; 2630 } 2631 return (rv); 2632 } 2633 } 2634 /*NOTREACHED*/ 2635 } 2636 2637 static int 2638 xdf_strategy(struct buf *bp) 2639 { 2640 xdf_t *vdp; 2641 minor_t minor; 2642 diskaddr_t p_blkct, p_blkst; 2643 ulong_t nblks; 2644 int part; 2645 2646 minor = getminor(bp->b_edev); 2647 part = XDF_PART(minor); 2648 vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor)); 2649 2650 mutex_enter(&vdp->xdf_dev_lk); 2651 if (!xdf_isopen(vdp, part)) { 2652 mutex_exit(&vdp->xdf_dev_lk); 2653 xdf_io_err(bp, ENXIO, 0); 2654 return (0); 2655 } 2656 2657 /* We don't allow IO from the oe_change callback thread */ 2658 ASSERT(curthread != vdp->xdf_oe_change_thread); 2659 2660 /* Check for writes to a read only device */ 2661 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 2662 mutex_exit(&vdp->xdf_dev_lk); 2663 xdf_io_err(bp, EROFS, 0); 2664 return (0); 2665 } 2666 2667 /* Check if this I/O is accessing a partition or the entire disk */ 2668 if ((long)bp->b_private == XB_SLICE_NONE) { 2669 /* This I/O is using an absolute offset */ 2670 p_blkct = vdp->xdf_xdev_nblocks; 2671 p_blkst = 0; 2672 } else { 2673 /* This I/O is using a partition relative offset */ 2674 mutex_exit(&vdp->xdf_dev_lk); 2675 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 2676 &p_blkst, NULL, NULL, NULL)) { 2677 xdf_io_err(bp, ENXIO, 0); 2678 return (0); 2679 } 2680 mutex_enter(&vdp->xdf_dev_lk); 2681 } 2682 2683 /* check for a starting block beyond the disk or partition limit */ 2684 if (bp->b_blkno > p_blkct) { 2685 DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64, 2686 vdp->xdf_addr, (longlong_t)bp->b_blkno, (uint64_t)p_blkct)); 2687 xdf_io_err(bp, EINVAL, 0); 2688 return (0); 2689 } 2690 2691 /* Legacy: don't set error flag at this case */ 2692 if (bp->b_blkno == p_blkct) { 2693 bp->b_resid = bp->b_bcount; 2694 biodone(bp); 2695 return (0); 2696 } 2697 2698 /* sanitize the input buf */ 2699 bioerror(bp, 0); 2700 bp->b_resid = 0; 2701 bp->av_back = bp->av_forw = NULL; 2702 2703 /* Adjust for partial transfer, this will result in an error later */ 2704 nblks = bp->b_bcount >> XB_BSHIFT; 2705 if ((bp->b_blkno + nblks) > p_blkct) { 2706 bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT; 2707 bp->b_bcount -= bp->b_resid; 2708 } 2709 2710 DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n", 2711 vdp->xdf_addr, (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount)); 2712 2713 /* Fix up the buf struct */ 2714 bp->b_flags |= B_BUSY; 2715 bp->b_private = (void *)(uintptr_t)p_blkst; 2716 2717 xdf_bp_push(vdp, bp); 2718 mutex_exit(&vdp->xdf_dev_lk); 2719 xdf_io_start(vdp); 2720 if (do_polled_io) 2721 (void) xdf_ring_drain(vdp); 2722 return (0); 2723 } 2724 2725 /*ARGSUSED*/ 2726 static int 2727 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 2728 { 2729 xdf_t *vdp; 2730 minor_t minor; 2731 diskaddr_t p_blkcnt; 2732 int part; 2733 2734 minor = getminor(dev); 2735 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2736 return (ENXIO); 2737 2738 DPRINTF(IO_DBG, ("xdf@%s: read offset 0x%"PRIx64"\n", 2739 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2740 2741 part = XDF_PART(minor); 2742 if (!xdf_isopen(vdp, part)) 2743 return (ENXIO); 2744 2745 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2746 NULL, NULL, NULL, NULL)) 2747 return (ENXIO); 2748 2749 if (U_INVAL(uiop)) 2750 return (EINVAL); 2751 2752 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 2753 } 2754 2755 /*ARGSUSED*/ 2756 static int 2757 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 2758 { 2759 xdf_t *vdp; 2760 minor_t minor; 2761 diskaddr_t p_blkcnt; 2762 int part; 2763 2764 minor = getminor(dev); 2765 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2766 return (ENXIO); 2767 2768 DPRINTF(IO_DBG, ("xdf@%s: write offset 0x%"PRIx64"\n", 2769 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2770 2771 part = XDF_PART(minor); 2772 if (!xdf_isopen(vdp, part)) 2773 return (ENXIO); 2774 2775 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2776 NULL, NULL, NULL, NULL)) 2777 return (ENXIO); 2778 2779 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 2780 return (ENOSPC); 2781 2782 if (U_INVAL(uiop)) 2783 return (EINVAL); 2784 2785 return (physio(xdf_strategy, NULL, dev, B_WRITE, xdfmin, uiop)); 2786 } 2787 2788 /*ARGSUSED*/ 2789 static int 2790 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 2791 { 2792 xdf_t *vdp; 2793 minor_t minor; 2794 struct uio *uiop = aiop->aio_uio; 2795 diskaddr_t p_blkcnt; 2796 int part; 2797 2798 minor = getminor(dev); 2799 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2800 return (ENXIO); 2801 2802 part = XDF_PART(minor); 2803 if (!xdf_isopen(vdp, part)) 2804 return (ENXIO); 2805 2806 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2807 NULL, NULL, NULL, NULL)) 2808 return (ENXIO); 2809 2810 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 2811 return (ENOSPC); 2812 2813 if (U_INVAL(uiop)) 2814 return (EINVAL); 2815 2816 return (aphysio(xdf_strategy, anocancel, dev, B_READ, xdfmin, aiop)); 2817 } 2818 2819 /*ARGSUSED*/ 2820 static int 2821 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 2822 { 2823 xdf_t *vdp; 2824 minor_t minor; 2825 struct uio *uiop = aiop->aio_uio; 2826 diskaddr_t p_blkcnt; 2827 int part; 2828 2829 minor = getminor(dev); 2830 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2831 return (ENXIO); 2832 2833 part = XDF_PART(minor); 2834 if (!xdf_isopen(vdp, part)) 2835 return (ENXIO); 2836 2837 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2838 NULL, NULL, NULL, NULL)) 2839 return (ENXIO); 2840 2841 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) 2842 return (ENOSPC); 2843 2844 if (U_INVAL(uiop)) 2845 return (EINVAL); 2846 2847 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, xdfmin, aiop)); 2848 } 2849 2850 static int 2851 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 2852 { 2853 struct buf dumpbuf, *dbp = &dumpbuf; 2854 xdf_t *vdp; 2855 minor_t minor; 2856 int err = 0; 2857 int part; 2858 diskaddr_t p_blkcnt, p_blkst; 2859 2860 minor = getminor(dev); 2861 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2862 return (ENXIO); 2863 2864 DPRINTF(IO_DBG, ("xdf@%s: dump addr (0x%p) blk (%ld) nblks (%d)\n", 2865 vdp->xdf_addr, (void *)addr, blkno, nblk)); 2866 2867 /* We don't allow IO from the oe_change callback thread */ 2868 ASSERT(curthread != vdp->xdf_oe_change_thread); 2869 2870 part = XDF_PART(minor); 2871 if (!xdf_isopen(vdp, part)) 2872 return (ENXIO); 2873 2874 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 2875 NULL, NULL, NULL)) 2876 return (ENXIO); 2877 2878 if ((blkno + nblk) > p_blkcnt) { 2879 cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64, 2880 vdp->xdf_addr, blkno + nblk, (uint64_t)p_blkcnt); 2881 return (EINVAL); 2882 } 2883 2884 bioinit(dbp); 2885 dbp->b_flags = B_BUSY; 2886 dbp->b_un.b_addr = addr; 2887 dbp->b_bcount = nblk << DEV_BSHIFT; 2888 dbp->b_blkno = blkno; 2889 dbp->b_edev = dev; 2890 dbp->b_private = (void *)(uintptr_t)p_blkst; 2891 2892 mutex_enter(&vdp->xdf_dev_lk); 2893 xdf_bp_push(vdp, dbp); 2894 mutex_exit(&vdp->xdf_dev_lk); 2895 xdf_io_start(vdp); 2896 err = xdf_ring_drain(vdp); 2897 biofini(dbp); 2898 return (err); 2899 } 2900 2901 /*ARGSUSED*/ 2902 static int 2903 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 2904 { 2905 minor_t minor; 2906 xdf_t *vdp; 2907 int part; 2908 ulong_t parbit; 2909 2910 minor = getminor(dev); 2911 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2912 return (ENXIO); 2913 2914 mutex_enter(&vdp->xdf_dev_lk); 2915 part = XDF_PART(minor); 2916 if (!xdf_isopen(vdp, part)) { 2917 mutex_exit(&vdp->xdf_dev_lk); 2918 return (ENXIO); 2919 } 2920 parbit = 1 << part; 2921 2922 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); 2923 if (otyp == OTYP_LYR) { 2924 ASSERT(vdp->xdf_vd_lyropen[part] > 0); 2925 if (--vdp->xdf_vd_lyropen[part] == 0) 2926 vdp->xdf_vd_open[otyp] &= ~parbit; 2927 } else { 2928 vdp->xdf_vd_open[otyp] &= ~parbit; 2929 } 2930 vdp->xdf_vd_exclopen &= ~parbit; 2931 2932 mutex_exit(&vdp->xdf_dev_lk); 2933 return (0); 2934 } 2935 2936 static int 2937 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 2938 { 2939 minor_t minor; 2940 xdf_t *vdp; 2941 int part; 2942 ulong_t parbit; 2943 diskaddr_t p_blkct = 0; 2944 boolean_t firstopen; 2945 boolean_t nodelay; 2946 2947 minor = getminor(*devp); 2948 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2949 return (ENXIO); 2950 2951 nodelay = (flag & (FNDELAY | FNONBLOCK)); 2952 2953 DPRINTF(DDI_DBG, ("xdf@%s: opening\n", vdp->xdf_addr)); 2954 2955 /* do cv_wait until connected or failed */ 2956 mutex_enter(&vdp->xdf_cb_lk); 2957 mutex_enter(&vdp->xdf_dev_lk); 2958 if (!nodelay && (xdf_connect_locked(vdp, B_TRUE) != XD_READY)) { 2959 mutex_exit(&vdp->xdf_dev_lk); 2960 mutex_exit(&vdp->xdf_cb_lk); 2961 return (ENXIO); 2962 } 2963 mutex_exit(&vdp->xdf_cb_lk); 2964 2965 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 2966 mutex_exit(&vdp->xdf_dev_lk); 2967 return (EROFS); 2968 } 2969 2970 part = XDF_PART(minor); 2971 parbit = 1 << part; 2972 if ((vdp->xdf_vd_exclopen & parbit) || 2973 ((flag & FEXCL) && xdf_isopen(vdp, part))) { 2974 mutex_exit(&vdp->xdf_dev_lk); 2975 return (EBUSY); 2976 } 2977 2978 /* are we the first one to open this node? */ 2979 firstopen = !xdf_isopen(vdp, -1); 2980 2981 if (otyp == OTYP_LYR) 2982 vdp->xdf_vd_lyropen[part]++; 2983 2984 vdp->xdf_vd_open[otyp] |= parbit; 2985 2986 if (flag & FEXCL) 2987 vdp->xdf_vd_exclopen |= parbit; 2988 2989 mutex_exit(&vdp->xdf_dev_lk); 2990 2991 /* force a re-validation */ 2992 if (firstopen) 2993 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 2994 2995 /* If this is a non-blocking open then we're done */ 2996 if (nodelay) 2997 return (0); 2998 2999 /* 3000 * This is a blocking open, so we require: 3001 * - that the disk have a valid label on it 3002 * - that the size of the partition that we're opening is non-zero 3003 */ 3004 if ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 3005 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0)) { 3006 (void) xdf_close(*devp, flag, otyp, credp); 3007 return (ENXIO); 3008 } 3009 3010 return (0); 3011 } 3012 3013 /*ARGSUSED*/ 3014 static void 3015 xdf_watch_hp_status_cb(dev_info_t *dip, const char *path, void *arg) 3016 { 3017 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 3018 cv_broadcast(&vdp->xdf_hp_status_cv); 3019 } 3020 3021 static int 3022 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 3023 char *name, caddr_t valuep, int *lengthp) 3024 { 3025 xdf_t *vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 3026 3027 /* 3028 * Sanity check that if a dev_t or dip were specified that they 3029 * correspond to this device driver. On debug kernels we'll 3030 * panic and on non-debug kernels we'll return failure. 3031 */ 3032 ASSERT(ddi_driver_major(dip) == xdf_major); 3033 ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdf_major)); 3034 if ((ddi_driver_major(dip) != xdf_major) || 3035 ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdf_major))) 3036 return (DDI_PROP_NOT_FOUND); 3037 3038 if (vdp == NULL) 3039 return (ddi_prop_op(dev, dip, prop_op, flags, 3040 name, valuep, lengthp)); 3041 3042 return (cmlb_prop_op(vdp->xdf_vd_lbl, 3043 dev, dip, prop_op, flags, name, valuep, lengthp, 3044 XDF_PART(getminor(dev)), NULL)); 3045 } 3046 3047 /*ARGSUSED*/ 3048 static int 3049 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 3050 { 3051 int instance = XDF_INST(getminor((dev_t)arg)); 3052 xdf_t *vbdp; 3053 3054 switch (cmd) { 3055 case DDI_INFO_DEVT2DEVINFO: 3056 if ((vbdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) { 3057 *rp = NULL; 3058 return (DDI_FAILURE); 3059 } 3060 *rp = vbdp->xdf_dip; 3061 return (DDI_SUCCESS); 3062 3063 case DDI_INFO_DEVT2INSTANCE: 3064 *rp = (void *)(uintptr_t)instance; 3065 return (DDI_SUCCESS); 3066 3067 default: 3068 return (DDI_FAILURE); 3069 } 3070 } 3071 3072 /*ARGSUSED*/ 3073 static int 3074 xdf_resume(dev_info_t *dip) 3075 { 3076 xdf_t *vdp; 3077 char *oename; 3078 3079 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 3080 goto err; 3081 3082 if (xdf_debug & SUSRES_DBG) 3083 xen_printf("xdf@%s: xdf_resume\n", vdp->xdf_addr); 3084 3085 mutex_enter(&vdp->xdf_cb_lk); 3086 3087 if (xvdi_resume(dip) != DDI_SUCCESS) { 3088 mutex_exit(&vdp->xdf_cb_lk); 3089 goto err; 3090 } 3091 3092 if (((oename = xvdi_get_oename(dip)) == NULL) || 3093 (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3094 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)) { 3095 mutex_exit(&vdp->xdf_cb_lk); 3096 goto err; 3097 } 3098 3099 mutex_enter(&vdp->xdf_dev_lk); 3100 ASSERT(vdp->xdf_state != XD_READY); 3101 xdf_set_state(vdp, XD_UNKNOWN); 3102 mutex_exit(&vdp->xdf_dev_lk); 3103 3104 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3105 mutex_exit(&vdp->xdf_cb_lk); 3106 goto err; 3107 } 3108 3109 mutex_exit(&vdp->xdf_cb_lk); 3110 3111 if (xdf_debug & SUSRES_DBG) 3112 xen_printf("xdf@%s: xdf_resume: done\n", vdp->xdf_addr); 3113 return (DDI_SUCCESS); 3114 err: 3115 if (xdf_debug & SUSRES_DBG) 3116 xen_printf("xdf@%s: xdf_resume: fail\n", vdp->xdf_addr); 3117 return (DDI_FAILURE); 3118 } 3119 3120 static int 3121 xdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3122 { 3123 int n, instance = ddi_get_instance(dip); 3124 ddi_iblock_cookie_t ibc, softibc; 3125 boolean_t dev_iscd = B_FALSE; 3126 xdf_t *vdp; 3127 char *oename, *xsname, *str; 3128 3129 if ((n = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_NOTPROM, 3130 "xdf_debug", 0)) != 0) 3131 xdf_debug = n; 3132 3133 switch (cmd) { 3134 case DDI_RESUME: 3135 return (xdf_resume(dip)); 3136 case DDI_ATTACH: 3137 break; 3138 default: 3139 return (DDI_FAILURE); 3140 } 3141 /* DDI_ATTACH */ 3142 3143 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 3144 ((oename = xvdi_get_oename(dip)) == NULL)) 3145 return (DDI_FAILURE); 3146 3147 /* 3148 * Disable auto-detach. This is necessary so that we don't get 3149 * detached while we're disconnected from the back end. 3150 */ 3151 if ((ddi_prop_update_int(DDI_DEV_T_NONE, dip, 3152 DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS)) 3153 return (DDI_FAILURE); 3154 3155 /* driver handles kernel-issued IOCTLs */ 3156 if (ddi_prop_create(DDI_DEV_T_NONE, dip, 3157 DDI_PROP_CANSLEEP, DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) 3158 return (DDI_FAILURE); 3159 3160 if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS) 3161 return (DDI_FAILURE); 3162 3163 if (ddi_get_soft_iblock_cookie(dip, 3164 DDI_SOFTINT_LOW, &softibc) != DDI_SUCCESS) 3165 return (DDI_FAILURE); 3166 3167 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 3168 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 3169 ddi_get_name_addr(dip)); 3170 return (DDI_FAILURE); 3171 } 3172 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 3173 dev_iscd = B_TRUE; 3174 strfree(str); 3175 3176 if (ddi_soft_state_zalloc(xdf_ssp, instance) != DDI_SUCCESS) 3177 return (DDI_FAILURE); 3178 3179 DPRINTF(DDI_DBG, ("xdf@%s: attaching\n", ddi_get_name_addr(dip))); 3180 vdp = ddi_get_soft_state(xdf_ssp, instance); 3181 ddi_set_driver_private(dip, vdp); 3182 vdp->xdf_dip = dip; 3183 vdp->xdf_addr = ddi_get_name_addr(dip); 3184 vdp->xdf_suspending = B_FALSE; 3185 vdp->xdf_media_req_supported = B_FALSE; 3186 vdp->xdf_peer = INVALID_DOMID; 3187 vdp->xdf_evtchn = INVALID_EVTCHN; 3188 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 3189 offsetof(v_req_t, v_link)); 3190 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 3191 cv_init(&vdp->xdf_hp_status_cv, NULL, CV_DEFAULT, NULL); 3192 cv_init(&vdp->xdf_mstate_cv, NULL, CV_DEFAULT, NULL); 3193 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3194 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3195 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3196 vdp->xdf_cmbl_reattach = B_TRUE; 3197 if (dev_iscd) { 3198 vdp->xdf_dinfo |= VDISK_CDROM; 3199 vdp->xdf_mstate = DKIO_EJECTED; 3200 } else { 3201 vdp->xdf_mstate = DKIO_NONE; 3202 } 3203 3204 if ((vdp->xdf_ready_tq = ddi_taskq_create(dip, "xdf_ready_tq", 3205 1, TASKQ_DEFAULTPRI, 0)) == NULL) 3206 goto errout0; 3207 3208 if (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3209 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS) 3210 goto errout0; 3211 3212 if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 3213 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 3214 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 3215 ddi_get_name_addr(dip)); 3216 goto errout0; 3217 } 3218 3219 /* 3220 * Initialize the physical geometry stucture. Note that currently 3221 * we don't know the size of the backend device so the number 3222 * of blocks on the device will be initialized to zero. Once 3223 * we connect to the backend device we'll update the physical 3224 * geometry to reflect the real size of the device. 3225 */ 3226 xdf_synthetic_pgeom(dip, &vdp->xdf_pgeom); 3227 vdp->xdf_pgeom_fixed = B_FALSE; 3228 3229 /* 3230 * create default device minor nodes: non-removable disk 3231 * we will adjust minor nodes after we are connected w/ backend 3232 */ 3233 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 3234 if (xdf_cmlb_attach(vdp) != 0) { 3235 cmn_err(CE_WARN, 3236 "xdf@%s: attach failed, cmlb attach failed", 3237 ddi_get_name_addr(dip)); 3238 goto errout0; 3239 } 3240 3241 /* 3242 * We ship with cache-enabled disks 3243 */ 3244 vdp->xdf_wce = B_TRUE; 3245 3246 mutex_enter(&vdp->xdf_cb_lk); 3247 /* Watch backend XenbusState change */ 3248 if (xvdi_add_event_handler(dip, 3249 XS_OE_STATE, xdf_oe_change, NULL) != DDI_SUCCESS) { 3250 mutex_exit(&vdp->xdf_cb_lk); 3251 goto errout0; 3252 } 3253 3254 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3255 cmn_err(CE_WARN, "xdf@%s: start connection failed", 3256 ddi_get_name_addr(dip)); 3257 mutex_exit(&vdp->xdf_cb_lk); 3258 goto errout1; 3259 } 3260 mutex_exit(&vdp->xdf_cb_lk); 3261 3262 #if defined(XPV_HVM_DRIVER) 3263 3264 xdf_hvm_add(dip); 3265 3266 /* Report our version to dom0. */ 3267 if (xenbus_printf(XBT_NULL, "hvmpv/xdf", "version", "%d", 3268 HVMPV_XDF_VERS)) 3269 cmn_err(CE_WARN, "xdf: couldn't write version\n"); 3270 3271 #else /* !XPV_HVM_DRIVER */ 3272 3273 /* create kstat for iostat(1M) */ 3274 if (xdf_kstat_create(dip, "xdf", instance) != 0) { 3275 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 3276 ddi_get_name_addr(dip)); 3277 goto errout1; 3278 } 3279 3280 #endif /* !XPV_HVM_DRIVER */ 3281 3282 ddi_report_dev(dip); 3283 DPRINTF(DDI_DBG, ("xdf@%s: attached\n", vdp->xdf_addr)); 3284 return (DDI_SUCCESS); 3285 3286 errout1: 3287 (void) xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed); 3288 xvdi_remove_event_handler(dip, XS_OE_STATE); 3289 errout0: 3290 if (vdp->xdf_vd_lbl != NULL) { 3291 cmlb_detach(vdp->xdf_vd_lbl, NULL); 3292 cmlb_free_handle(&vdp->xdf_vd_lbl); 3293 vdp->xdf_vd_lbl = NULL; 3294 } 3295 if (vdp->xdf_softintr_id != NULL) 3296 ddi_remove_softintr(vdp->xdf_softintr_id); 3297 xvdi_remove_xb_watch_handlers(dip); 3298 if (vdp->xdf_ready_tq != NULL) 3299 ddi_taskq_destroy(vdp->xdf_ready_tq); 3300 mutex_destroy(&vdp->xdf_cb_lk); 3301 mutex_destroy(&vdp->xdf_dev_lk); 3302 cv_destroy(&vdp->xdf_dev_cv); 3303 cv_destroy(&vdp->xdf_hp_status_cv); 3304 ddi_soft_state_free(xdf_ssp, instance); 3305 ddi_set_driver_private(dip, NULL); 3306 ddi_prop_remove_all(dip); 3307 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(dip)); 3308 return (DDI_FAILURE); 3309 } 3310 3311 static int 3312 xdf_suspend(dev_info_t *dip) 3313 { 3314 int instance = ddi_get_instance(dip); 3315 xdf_t *vdp; 3316 3317 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) 3318 return (DDI_FAILURE); 3319 3320 if (xdf_debug & SUSRES_DBG) 3321 xen_printf("xdf@%s: xdf_suspend\n", vdp->xdf_addr); 3322 3323 xvdi_suspend(dip); 3324 3325 mutex_enter(&vdp->xdf_cb_lk); 3326 mutex_enter(&vdp->xdf_dev_lk); 3327 3328 vdp->xdf_suspending = B_TRUE; 3329 xdf_ring_destroy(vdp); 3330 xdf_set_state(vdp, XD_SUSPEND); 3331 vdp->xdf_suspending = B_FALSE; 3332 3333 mutex_exit(&vdp->xdf_dev_lk); 3334 mutex_exit(&vdp->xdf_cb_lk); 3335 3336 if (xdf_debug & SUSRES_DBG) 3337 xen_printf("xdf@%s: xdf_suspend: done\n", vdp->xdf_addr); 3338 3339 return (DDI_SUCCESS); 3340 } 3341 3342 static int 3343 xdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3344 { 3345 xdf_t *vdp; 3346 int instance; 3347 3348 switch (cmd) { 3349 3350 case DDI_PM_SUSPEND: 3351 break; 3352 3353 case DDI_SUSPEND: 3354 return (xdf_suspend(dip)); 3355 3356 case DDI_DETACH: 3357 break; 3358 3359 default: 3360 return (DDI_FAILURE); 3361 } 3362 3363 instance = ddi_get_instance(dip); 3364 DPRINTF(DDI_DBG, ("xdf@%s: detaching\n", ddi_get_name_addr(dip))); 3365 vdp = ddi_get_soft_state(xdf_ssp, instance); 3366 3367 if (vdp == NULL) 3368 return (DDI_FAILURE); 3369 3370 mutex_enter(&vdp->xdf_cb_lk); 3371 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 3372 if (vdp->xdf_state != XD_CLOSED) { 3373 mutex_exit(&vdp->xdf_cb_lk); 3374 return (DDI_FAILURE); 3375 } 3376 mutex_exit(&vdp->xdf_cb_lk); 3377 3378 ASSERT(!ISDMACBON(vdp)); 3379 3380 #if defined(XPV_HVM_DRIVER) 3381 xdf_hvm_rm(dip); 3382 #endif /* XPV_HVM_DRIVER */ 3383 3384 if (vdp->xdf_timeout_id != 0) 3385 (void) untimeout(vdp->xdf_timeout_id); 3386 3387 xvdi_remove_event_handler(dip, XS_OE_STATE); 3388 ddi_taskq_destroy(vdp->xdf_ready_tq); 3389 3390 cmlb_detach(vdp->xdf_vd_lbl, NULL); 3391 cmlb_free_handle(&vdp->xdf_vd_lbl); 3392 3393 /* we'll support backend running in domU later */ 3394 #ifdef DOMU_BACKEND 3395 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 3396 #endif 3397 3398 list_destroy(&vdp->xdf_vreq_act); 3399 ddi_prop_remove_all(dip); 3400 xdf_kstat_delete(dip); 3401 ddi_remove_softintr(vdp->xdf_softintr_id); 3402 xvdi_remove_xb_watch_handlers(dip); 3403 ddi_set_driver_private(dip, NULL); 3404 cv_destroy(&vdp->xdf_dev_cv); 3405 mutex_destroy(&vdp->xdf_cb_lk); 3406 mutex_destroy(&vdp->xdf_dev_lk); 3407 if (vdp->xdf_cache_flush_block != NULL) 3408 kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE); 3409 ddi_soft_state_free(xdf_ssp, instance); 3410 return (DDI_SUCCESS); 3411 } 3412 3413 /* 3414 * Driver linkage structures. 3415 */ 3416 static struct cb_ops xdf_cbops = { 3417 xdf_open, 3418 xdf_close, 3419 xdf_strategy, 3420 nodev, 3421 xdf_dump, 3422 xdf_read, 3423 xdf_write, 3424 xdf_ioctl, 3425 nodev, 3426 nodev, 3427 nodev, 3428 nochpoll, 3429 xdf_prop_op, 3430 NULL, 3431 D_MP | D_NEW | D_64BIT, 3432 CB_REV, 3433 xdf_aread, 3434 xdf_awrite 3435 }; 3436 3437 struct dev_ops xdf_devops = { 3438 DEVO_REV, /* devo_rev */ 3439 0, /* devo_refcnt */ 3440 xdf_getinfo, /* devo_getinfo */ 3441 nulldev, /* devo_identify */ 3442 nulldev, /* devo_probe */ 3443 xdf_attach, /* devo_attach */ 3444 xdf_detach, /* devo_detach */ 3445 nodev, /* devo_reset */ 3446 &xdf_cbops, /* devo_cb_ops */ 3447 NULL, /* devo_bus_ops */ 3448 NULL, /* devo_power */ 3449 ddi_quiesce_not_supported, /* devo_quiesce */ 3450 }; 3451 3452 /* 3453 * Module linkage structures. 3454 */ 3455 static struct modldrv modldrv = { 3456 &mod_driverops, /* Type of module. This one is a driver */ 3457 "virtual block driver", /* short description */ 3458 &xdf_devops /* driver specific ops */ 3459 }; 3460 3461 static struct modlinkage xdf_modlinkage = { 3462 MODREV_1, (void *)&modldrv, NULL 3463 }; 3464 3465 /* 3466 * standard module entry points 3467 */ 3468 int 3469 _init(void) 3470 { 3471 int rc; 3472 3473 xdf_major = ddi_name_to_major("xdf"); 3474 if (xdf_major == (major_t)-1) 3475 return (EINVAL); 3476 3477 if ((rc = ddi_soft_state_init(&xdf_ssp, sizeof (xdf_t), 0)) != 0) 3478 return (rc); 3479 3480 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 3481 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3482 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 3483 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3484 3485 #if defined(XPV_HVM_DRIVER) 3486 xdf_hvm_init(); 3487 #endif /* XPV_HVM_DRIVER */ 3488 3489 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 3490 #if defined(XPV_HVM_DRIVER) 3491 xdf_hvm_fini(); 3492 #endif /* XPV_HVM_DRIVER */ 3493 kmem_cache_destroy(xdf_vreq_cache); 3494 kmem_cache_destroy(xdf_gs_cache); 3495 ddi_soft_state_fini(&xdf_ssp); 3496 return (rc); 3497 } 3498 3499 return (rc); 3500 } 3501 3502 int 3503 _fini(void) 3504 { 3505 3506 int err; 3507 if ((err = mod_remove(&xdf_modlinkage)) != 0) 3508 return (err); 3509 3510 #if defined(XPV_HVM_DRIVER) 3511 xdf_hvm_fini(); 3512 #endif /* XPV_HVM_DRIVER */ 3513 3514 kmem_cache_destroy(xdf_vreq_cache); 3515 kmem_cache_destroy(xdf_gs_cache); 3516 ddi_soft_state_fini(&xdf_ssp); 3517 3518 return (0); 3519 } 3520 3521 int 3522 _info(struct modinfo *modinfop) 3523 { 3524 return (mod_info(&xdf_modlinkage, modinfop)); 3525 } 3526