1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright (c) 2014, 2017 by Delphix. All rights reserved. 29 * Copyright 2017 Nexenta Systems, Inc. 30 */ 31 32 /* 33 * xdf.c - Xen Virtual Block Device Driver 34 * TODO: 35 * - support alternate block size (currently only DEV_BSIZE supported) 36 * - revalidate geometry for removable devices 37 * 38 * This driver exports disk device nodes, accepts IO requests from those 39 * nodes, and services those requests by talking to a backend device 40 * in another domain. 41 * 42 * Communication with the backend device is done via a ringbuffer (which is 43 * managed via xvdi interfaces) and dma memory (which is managed via ddi 44 * interfaces). 45 * 46 * Communication with the backend device is dependant upon establishing a 47 * connection to the backend device. This connection process involves 48 * reading device configuration information from xenbus and publishing 49 * some frontend runtime configuration parameters via the xenbus (for 50 * consumption by the backend). Once we've published runtime configuration 51 * information via the xenbus, the backend device can enter the connected 52 * state and we'll enter the XD_CONNECTED state. But before we can allow 53 * random IO to begin, we need to do IO to the backend device to determine 54 * the device label and if flush operations are supported. Once this is 55 * done we enter the XD_READY state and can process any IO operations. 56 * 57 * We receive notifications of xenbus state changes for the backend device 58 * (aka, the "other end") via the xdf_oe_change() callback. This callback 59 * is single threaded, meaning that we can't receive new notification of 60 * other end state changes while we're processing an outstanding 61 * notification of an other end state change. There for we can't do any 62 * blocking operations from the xdf_oe_change() callback. This is why we 63 * have a seperate taskq (xdf_ready_tq) which exists to do the necessary 64 * IO to get us from the XD_CONNECTED to the XD_READY state. All IO 65 * generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go 66 * throught xdf_lb_rdwr(), which is a synchronous IO interface. IOs 67 * generated by the xdf_ready_tq_thread thread have priority over all 68 * other IO requests. 69 * 70 * We also communicate with the backend device via the xenbus "media-req" 71 * (XBP_MEDIA_REQ) property. For more information on this see the 72 * comments in blkif.h. 73 */ 74 75 #include <io/xdf.h> 76 77 #include <sys/conf.h> 78 #include <sys/dkio.h> 79 #include <sys/promif.h> 80 #include <sys/sysmacros.h> 81 #include <sys/kstat.h> 82 #include <sys/mach_mmu.h> 83 #ifdef XPV_HVM_DRIVER 84 #include <sys/xpv_support.h> 85 #else /* !XPV_HVM_DRIVER */ 86 #include <sys/evtchn_impl.h> 87 #endif /* !XPV_HVM_DRIVER */ 88 #include <sys/sunndi.h> 89 #include <public/io/xenbus.h> 90 #include <xen/sys/xenbus_impl.h> 91 #include <sys/scsi/generic/inquiry.h> 92 #include <xen/io/blkif_impl.h> 93 #include <sys/fdio.h> 94 #include <sys/cdio.h> 95 96 /* 97 * DEBUG_EVAL can be used to include debug only statements without 98 * having to use '#ifdef DEBUG' statements 99 */ 100 #ifdef DEBUG 101 #define DEBUG_EVAL(x) (x) 102 #else /* !DEBUG */ 103 #define DEBUG_EVAL(x) 104 #endif /* !DEBUG */ 105 106 #define XDF_DRAIN_MSEC_DELAY (50*1000) /* 00.05 sec */ 107 #define XDF_DRAIN_RETRY_COUNT 200 /* 10.00 sec */ 108 #define XDF_STATE_TIMEOUT (30*1000*1000) /* 30.00 sec */ 109 110 #define INVALID_DOMID ((domid_t)-1) 111 #define FLUSH_DISKCACHE 0x1 112 #define WRITE_BARRIER 0x2 113 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 114 #define USE_WRITE_BARRIER(vdp) \ 115 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 116 #define USE_FLUSH_DISKCACHE(vdp) \ 117 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 118 #define IS_WRITE_BARRIER(vdp, bp) \ 119 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 120 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 121 #define IS_FLUSH_DISKCACHE(bp) \ 122 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 123 124 #define VREQ_DONE(vreq) \ 125 VOID2BOOLEAN(((vreq)->v_status == VREQ_DMAWIN_DONE) && \ 126 (((vreq)->v_flush_diskcache == FLUSH_DISKCACHE) || \ 127 (((vreq)->v_dmaw + 1) == (vreq)->v_ndmaws))) 128 129 #define BP_VREQ(bp) ((v_req_t *)((bp)->av_back)) 130 #define BP_VREQ_SET(bp, vreq) (((bp)->av_back = (buf_t *)(vreq))) 131 132 extern int do_polled_io; 133 134 /* run-time tunables that we don't want the compiler to optimize away */ 135 volatile int xdf_debug = 0; 136 volatile boolean_t xdf_barrier_flush_disable = B_FALSE; 137 138 /* per module globals */ 139 major_t xdf_major; 140 static void *xdf_ssp; 141 static kmem_cache_t *xdf_vreq_cache; 142 static kmem_cache_t *xdf_gs_cache; 143 static int xdf_maxphys = XB_MAXPHYS; 144 static diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 145 static int xdf_fbrewrites; /* flush block re-write count */ 146 147 /* misc public functions */ 148 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); 149 int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 150 151 /* misc private functions */ 152 static void xdf_io_start(xdf_t *); 153 static void xdf_devid_setup(xdf_t *); 154 155 /* callbacks from commmon label */ 156 static cmlb_tg_ops_t xdf_lb_ops = { 157 TG_DK_OPS_VERSION_1, 158 xdf_lb_rdwr, 159 xdf_lb_getinfo 160 }; 161 162 /* 163 * I/O buffer DMA attributes 164 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 165 */ 166 static ddi_dma_attr_t xb_dma_attr = { 167 DMA_ATTR_V0, 168 (uint64_t)0, /* lowest address */ 169 (uint64_t)0xffffffffffffffff, /* highest usable address */ 170 (uint64_t)0xffffff, /* DMA counter limit max */ 171 (uint64_t)XB_BSIZE, /* alignment in bytes */ 172 XB_BSIZE - 1, /* bitmap of burst sizes */ 173 XB_BSIZE, /* min transfer */ 174 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 175 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 176 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 177 XB_BSIZE, /* granularity */ 178 0, /* flags (reserved) */ 179 }; 180 181 static ddi_device_acc_attr_t xc_acc_attr = { 182 DDI_DEVICE_ATTR_V0, 183 DDI_NEVERSWAP_ACC, 184 DDI_STRICTORDER_ACC 185 }; 186 187 static void 188 xdf_timeout_handler(void *arg) 189 { 190 xdf_t *vdp = arg; 191 192 mutex_enter(&vdp->xdf_dev_lk); 193 vdp->xdf_timeout_id = 0; 194 mutex_exit(&vdp->xdf_dev_lk); 195 196 /* new timeout thread could be re-scheduled */ 197 xdf_io_start(vdp); 198 } 199 200 /* 201 * callback func when DMA/GTE resources is available 202 * 203 * Note: we only register one callback function to grant table subsystem 204 * since we only have one 'struct gnttab_free_callback' in xdf_t. 205 */ 206 static void 207 xdf_gncallback(void *arg) 208 { 209 xdf_t *vdp = arg; 210 ASSERT(vdp != NULL); 211 212 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 213 vdp->xdf_addr)); 214 215 ddi_trigger_softintr(vdp->xdf_softintr_id); 216 } 217 218 static int 219 xdf_dmacallback(caddr_t arg) 220 { 221 xdf_gncallback(arg); 222 return (DDI_DMA_CALLBACK_DONE); 223 } 224 225 static ge_slot_t * 226 gs_get(xdf_t *vdp, int isread) 227 { 228 grant_ref_t gh; 229 ge_slot_t *gs; 230 231 /* try to alloc GTEs needed in this slot, first */ 232 if (gnttab_alloc_grant_references( 233 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 234 if (vdp->xdf_gnt_callback.next == NULL) { 235 SETDMACBON(vdp); 236 gnttab_request_free_callback( 237 &vdp->xdf_gnt_callback, 238 xdf_gncallback, 239 (void *)vdp, 240 BLKIF_MAX_SEGMENTS_PER_REQUEST); 241 } 242 return (NULL); 243 } 244 245 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 246 if (gs == NULL) { 247 gnttab_free_grant_references(gh); 248 if (vdp->xdf_timeout_id == 0) 249 /* restart I/O after one second */ 250 vdp->xdf_timeout_id = 251 timeout(xdf_timeout_handler, vdp, hz); 252 return (NULL); 253 } 254 255 /* init gs_slot */ 256 gs->gs_oeid = vdp->xdf_peer; 257 gs->gs_isread = isread; 258 gs->gs_ghead = gh; 259 gs->gs_ngrefs = 0; 260 261 return (gs); 262 } 263 264 static void 265 gs_free(ge_slot_t *gs) 266 { 267 int i; 268 269 /* release all grant table entry resources used in this slot */ 270 for (i = 0; i < gs->gs_ngrefs; i++) 271 gnttab_end_foreign_access(gs->gs_ge[i], !gs->gs_isread, 0); 272 gnttab_free_grant_references(gs->gs_ghead); 273 list_remove(&gs->gs_vreq->v_gs, gs); 274 kmem_cache_free(xdf_gs_cache, gs); 275 } 276 277 static grant_ref_t 278 gs_grant(ge_slot_t *gs, mfn_t mfn) 279 { 280 grant_ref_t gr = gnttab_claim_grant_reference(&gs->gs_ghead); 281 282 ASSERT(gr != -1); 283 ASSERT(gs->gs_ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 284 gs->gs_ge[gs->gs_ngrefs++] = gr; 285 gnttab_grant_foreign_access_ref(gr, gs->gs_oeid, mfn, !gs->gs_isread); 286 287 return (gr); 288 } 289 290 /* 291 * Alloc a vreq for this bp 292 * bp->av_back contains the pointer to the vreq upon return 293 */ 294 static v_req_t * 295 vreq_get(xdf_t *vdp, buf_t *bp) 296 { 297 v_req_t *vreq = NULL; 298 299 ASSERT(BP_VREQ(bp) == NULL); 300 301 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 302 if (vreq == NULL) { 303 if (vdp->xdf_timeout_id == 0) 304 /* restart I/O after one second */ 305 vdp->xdf_timeout_id = 306 timeout(xdf_timeout_handler, vdp, hz); 307 return (NULL); 308 } 309 bzero(vreq, sizeof (v_req_t)); 310 list_create(&vreq->v_gs, sizeof (ge_slot_t), 311 offsetof(ge_slot_t, gs_vreq_link)); 312 vreq->v_buf = bp; 313 vreq->v_status = VREQ_INIT; 314 vreq->v_runq = B_FALSE; 315 BP_VREQ_SET(bp, vreq); 316 /* init of other fields in vreq is up to the caller */ 317 318 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 319 320 return (vreq); 321 } 322 323 static void 324 vreq_free(xdf_t *vdp, v_req_t *vreq) 325 { 326 buf_t *bp = vreq->v_buf; 327 328 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 329 ASSERT(BP_VREQ(bp) == vreq); 330 331 list_remove(&vdp->xdf_vreq_act, vreq); 332 333 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 334 goto done; 335 336 switch (vreq->v_status) { 337 case VREQ_DMAWIN_DONE: 338 case VREQ_GS_ALLOCED: 339 case VREQ_DMABUF_BOUND: 340 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 341 /*FALLTHRU*/ 342 case VREQ_DMAMEM_ALLOCED: 343 if (!ALIGNED_XFER(bp)) { 344 ASSERT(vreq->v_abuf != NULL); 345 if (!IS_ERROR(bp) && IS_READ(bp)) 346 bcopy(vreq->v_abuf, bp->b_un.b_addr, 347 bp->b_bcount); 348 ddi_dma_mem_free(&vreq->v_align); 349 } 350 /*FALLTHRU*/ 351 case VREQ_MEMDMAHDL_ALLOCED: 352 if (!ALIGNED_XFER(bp)) 353 ddi_dma_free_handle(&vreq->v_memdmahdl); 354 /*FALLTHRU*/ 355 case VREQ_DMAHDL_ALLOCED: 356 ddi_dma_free_handle(&vreq->v_dmahdl); 357 break; 358 default: 359 break; 360 } 361 done: 362 ASSERT(!vreq->v_runq); 363 list_destroy(&vreq->v_gs); 364 kmem_cache_free(xdf_vreq_cache, vreq); 365 } 366 367 /* 368 * Snarf new data if our flush block was re-written 369 */ 370 static void 371 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 372 { 373 int nblks; 374 boolean_t mapin; 375 376 if (IS_WRITE_BARRIER(vdp, bp)) 377 return; /* write was a flush write */ 378 379 mapin = B_FALSE; 380 nblks = bp->b_bcount >> DEV_BSHIFT; 381 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 382 xdf_fbrewrites++; 383 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 384 mapin = B_TRUE; 385 bp_mapin(bp); 386 } 387 bcopy(bp->b_un.b_addr + 388 ((xdf_flush_block - blkno) << DEV_BSHIFT), 389 vdp->xdf_cache_flush_block, DEV_BSIZE); 390 if (mapin) 391 bp_mapout(bp); 392 } 393 } 394 395 /* 396 * Initalize the DMA and grant table resources for the buf 397 */ 398 static int 399 vreq_setup(xdf_t *vdp, v_req_t *vreq) 400 { 401 int rc; 402 ddi_dma_attr_t dmaattr; 403 uint_t ndcs, ndws; 404 ddi_dma_handle_t dh; 405 ddi_dma_handle_t mdh; 406 ddi_dma_cookie_t dc; 407 ddi_acc_handle_t abh; 408 caddr_t aba; 409 ge_slot_t *gs; 410 size_t bufsz; 411 off_t off; 412 size_t sz; 413 buf_t *bp = vreq->v_buf; 414 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 415 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 416 417 switch (vreq->v_status) { 418 case VREQ_INIT: 419 if (IS_FLUSH_DISKCACHE(bp)) { 420 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 421 DPRINTF(DMA_DBG, ("xdf@%s: " 422 "get ge_slotfailed\n", vdp->xdf_addr)); 423 return (DDI_FAILURE); 424 } 425 vreq->v_blkno = 0; 426 vreq->v_nslots = 1; 427 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 428 vreq->v_status = VREQ_GS_ALLOCED; 429 gs->gs_vreq = vreq; 430 list_insert_head(&vreq->v_gs, gs); 431 return (DDI_SUCCESS); 432 } 433 434 if (IS_WRITE_BARRIER(vdp, bp)) 435 vreq->v_flush_diskcache = WRITE_BARRIER; 436 vreq->v_blkno = bp->b_blkno + 437 (diskaddr_t)(uintptr_t)bp->b_private; 438 /* See if we wrote new data to our flush block */ 439 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 440 check_fbwrite(vdp, bp, vreq->v_blkno); 441 vreq->v_status = VREQ_INIT_DONE; 442 /*FALLTHRU*/ 443 444 case VREQ_INIT_DONE: 445 /* 446 * alloc DMA handle 447 */ 448 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 449 xdf_dmacallback, (caddr_t)vdp, &dh); 450 if (rc != DDI_SUCCESS) { 451 SETDMACBON(vdp); 452 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 453 vdp->xdf_addr)); 454 return (DDI_FAILURE); 455 } 456 457 vreq->v_dmahdl = dh; 458 vreq->v_status = VREQ_DMAHDL_ALLOCED; 459 /*FALLTHRU*/ 460 461 case VREQ_DMAHDL_ALLOCED: 462 /* 463 * alloc dma handle for 512-byte aligned buf 464 */ 465 if (!ALIGNED_XFER(bp)) { 466 /* 467 * XXPV: we need to temporarily enlarge the seg 468 * boundary and s/g length to work round CR6381968 469 */ 470 dmaattr = xb_dma_attr; 471 dmaattr.dma_attr_seg = (uint64_t)-1; 472 dmaattr.dma_attr_sgllen = INT_MAX; 473 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 474 xdf_dmacallback, (caddr_t)vdp, &mdh); 475 if (rc != DDI_SUCCESS) { 476 SETDMACBON(vdp); 477 DPRINTF(DMA_DBG, ("xdf@%s: " 478 "unaligned buf DMAhandle alloc failed\n", 479 vdp->xdf_addr)); 480 return (DDI_FAILURE); 481 } 482 vreq->v_memdmahdl = mdh; 483 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 484 } 485 /*FALLTHRU*/ 486 487 case VREQ_MEMDMAHDL_ALLOCED: 488 /* 489 * alloc 512-byte aligned buf 490 */ 491 if (!ALIGNED_XFER(bp)) { 492 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 493 bp_mapin(bp); 494 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 495 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 496 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 497 &aba, &bufsz, &abh); 498 if (rc != DDI_SUCCESS) { 499 SETDMACBON(vdp); 500 DPRINTF(DMA_DBG, ("xdf@%s: " 501 "DMA mem allocation failed\n", 502 vdp->xdf_addr)); 503 return (DDI_FAILURE); 504 } 505 506 vreq->v_abuf = aba; 507 vreq->v_align = abh; 508 vreq->v_status = VREQ_DMAMEM_ALLOCED; 509 510 ASSERT(bufsz >= bp->b_bcount); 511 if (!IS_READ(bp)) 512 bcopy(bp->b_un.b_addr, vreq->v_abuf, 513 bp->b_bcount); 514 } 515 /*FALLTHRU*/ 516 517 case VREQ_DMAMEM_ALLOCED: 518 /* 519 * dma bind 520 */ 521 if (ALIGNED_XFER(bp)) { 522 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 523 dma_flags, xdf_dmacallback, (caddr_t)vdp, 524 &dc, &ndcs); 525 } else { 526 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 527 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 528 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 529 } 530 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 531 /* get num of dma windows */ 532 if (rc == DDI_DMA_PARTIAL_MAP) { 533 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 534 ASSERT(rc == DDI_SUCCESS); 535 } else { 536 ndws = 1; 537 } 538 } else { 539 SETDMACBON(vdp); 540 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 541 vdp->xdf_addr)); 542 return (DDI_FAILURE); 543 } 544 545 vreq->v_dmac = dc; 546 vreq->v_dmaw = 0; 547 vreq->v_ndmacs = ndcs; 548 vreq->v_ndmaws = ndws; 549 vreq->v_nslots = ndws; 550 vreq->v_status = VREQ_DMABUF_BOUND; 551 /*FALLTHRU*/ 552 553 case VREQ_DMABUF_BOUND: 554 /* 555 * get ge_slot, callback is set upon failure from gs_get(), 556 * if not set previously 557 */ 558 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 559 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 560 vdp->xdf_addr)); 561 return (DDI_FAILURE); 562 } 563 564 vreq->v_status = VREQ_GS_ALLOCED; 565 gs->gs_vreq = vreq; 566 list_insert_head(&vreq->v_gs, gs); 567 break; 568 569 case VREQ_GS_ALLOCED: 570 /* nothing need to be done */ 571 break; 572 573 case VREQ_DMAWIN_DONE: 574 /* 575 * move to the next dma window 576 */ 577 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 578 579 /* get a ge_slot for this DMA window */ 580 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 581 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 582 vdp->xdf_addr)); 583 return (DDI_FAILURE); 584 } 585 586 vreq->v_dmaw++; 587 VERIFY(ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 588 &vreq->v_dmac, &vreq->v_ndmacs) == DDI_SUCCESS); 589 vreq->v_status = VREQ_GS_ALLOCED; 590 gs->gs_vreq = vreq; 591 list_insert_head(&vreq->v_gs, gs); 592 break; 593 594 default: 595 return (DDI_FAILURE); 596 } 597 598 return (DDI_SUCCESS); 599 } 600 601 static int 602 xdf_cmlb_attach(xdf_t *vdp) 603 { 604 dev_info_t *dip = vdp->xdf_dip; 605 606 return (cmlb_attach(dip, &xdf_lb_ops, 607 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 608 XD_IS_RM(vdp), B_TRUE, 609 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD, 610 0, vdp->xdf_vd_lbl, NULL)); 611 } 612 613 static void 614 xdf_io_err(buf_t *bp, int err, size_t resid) 615 { 616 bioerror(bp, err); 617 if (resid == 0) 618 bp->b_resid = bp->b_bcount; 619 biodone(bp); 620 } 621 622 static void 623 xdf_kstat_enter(xdf_t *vdp, buf_t *bp) 624 { 625 v_req_t *vreq = BP_VREQ(bp); 626 627 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 628 629 if (vdp->xdf_xdev_iostat == NULL) 630 return; 631 if ((vreq != NULL) && vreq->v_runq) { 632 kstat_runq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 633 } else { 634 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 635 } 636 } 637 638 static void 639 xdf_kstat_exit(xdf_t *vdp, buf_t *bp) 640 { 641 v_req_t *vreq = BP_VREQ(bp); 642 643 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 644 645 if (vdp->xdf_xdev_iostat == NULL) 646 return; 647 648 if ((vreq != NULL) && vreq->v_runq) { 649 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 650 } else { 651 kstat_waitq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 652 } 653 654 if (bp->b_flags & B_READ) { 655 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->reads++; 656 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->nread += bp->b_bcount; 657 } else if (bp->b_flags & B_WRITE) { 658 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->writes++; 659 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->nwritten += bp->b_bcount; 660 } 661 } 662 663 static void 664 xdf_kstat_waitq_to_runq(xdf_t *vdp, buf_t *bp) 665 { 666 v_req_t *vreq = BP_VREQ(bp); 667 668 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 669 ASSERT(!vreq->v_runq); 670 671 vreq->v_runq = B_TRUE; 672 if (vdp->xdf_xdev_iostat == NULL) 673 return; 674 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 675 } 676 677 static void 678 xdf_kstat_runq_to_waitq(xdf_t *vdp, buf_t *bp) 679 { 680 v_req_t *vreq = BP_VREQ(bp); 681 682 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 683 ASSERT(vreq->v_runq); 684 685 vreq->v_runq = B_FALSE; 686 if (vdp->xdf_xdev_iostat == NULL) 687 return; 688 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 689 } 690 691 int 692 xdf_kstat_create(dev_info_t *dip) 693 { 694 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 695 kstat_t *kstat; 696 buf_t *bp; 697 698 if ((kstat = kstat_create("xdf", ddi_get_instance(dip), NULL, "disk", 699 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) 700 return (-1); 701 702 /* See comment about locking in xdf_kstat_delete(). */ 703 mutex_enter(&vdp->xdf_iostat_lk); 704 mutex_enter(&vdp->xdf_dev_lk); 705 706 /* only one kstat can exist at a time */ 707 if (vdp->xdf_xdev_iostat != NULL) { 708 mutex_exit(&vdp->xdf_dev_lk); 709 mutex_exit(&vdp->xdf_iostat_lk); 710 kstat_delete(kstat); 711 return (-1); 712 } 713 714 vdp->xdf_xdev_iostat = kstat; 715 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 716 kstat_install(vdp->xdf_xdev_iostat); 717 718 /* 719 * Now that we've created a kstat, we need to update the waitq and 720 * runq counts for the kstat to reflect our current state. 721 * 722 * For a buf_t structure to be on the runq, it must have a ring 723 * buffer slot associated with it. To get a ring buffer slot the 724 * buf must first have a v_req_t and a ge_slot_t associated with it. 725 * Then when it is granted a ring buffer slot, v_runq will be set to 726 * true. 727 * 728 * For a buf_t structure to be on the waitq, it must not be on the 729 * runq. So to find all the buf_t's that should be on waitq, we 730 * walk the active buf list and add any buf_t's which aren't on the 731 * runq to the waitq. 732 */ 733 bp = vdp->xdf_f_act; 734 while (bp != NULL) { 735 xdf_kstat_enter(vdp, bp); 736 bp = bp->av_forw; 737 } 738 if (vdp->xdf_ready_tq_bp != NULL) 739 xdf_kstat_enter(vdp, vdp->xdf_ready_tq_bp); 740 741 mutex_exit(&vdp->xdf_dev_lk); 742 mutex_exit(&vdp->xdf_iostat_lk); 743 return (0); 744 } 745 746 void 747 xdf_kstat_delete(dev_info_t *dip) 748 { 749 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 750 kstat_t *kstat; 751 buf_t *bp; 752 753 /* 754 * The locking order here is xdf_iostat_lk and then xdf_dev_lk. 755 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer 756 * and the contents of the our kstat. xdf_iostat_lk is used 757 * to protect the allocation and freeing of the actual kstat. 758 * xdf_dev_lk can't be used for this purpose because kstat 759 * readers use it to access the contents of the kstat and 760 * hence it can't be held when calling kstat_delete(). 761 */ 762 mutex_enter(&vdp->xdf_iostat_lk); 763 mutex_enter(&vdp->xdf_dev_lk); 764 765 if (vdp->xdf_xdev_iostat == NULL) { 766 mutex_exit(&vdp->xdf_dev_lk); 767 mutex_exit(&vdp->xdf_iostat_lk); 768 return; 769 } 770 771 /* 772 * We're about to destroy the kstat structures, so it isn't really 773 * necessary to update the runq and waitq counts. But, since this 774 * isn't a hot code path we can afford to be a little pedantic and 775 * go ahead and decrement the runq and waitq kstat counters to zero 776 * before free'ing them. This helps us ensure that we've gotten all 777 * our accounting correct. 778 * 779 * For an explanation of how we determine which buffers go on the 780 * runq vs which go on the waitq, see the comments in 781 * xdf_kstat_create(). 782 */ 783 bp = vdp->xdf_f_act; 784 while (bp != NULL) { 785 xdf_kstat_exit(vdp, bp); 786 bp = bp->av_forw; 787 } 788 if (vdp->xdf_ready_tq_bp != NULL) 789 xdf_kstat_exit(vdp, vdp->xdf_ready_tq_bp); 790 791 kstat = vdp->xdf_xdev_iostat; 792 vdp->xdf_xdev_iostat = NULL; 793 mutex_exit(&vdp->xdf_dev_lk); 794 kstat_delete(kstat); 795 mutex_exit(&vdp->xdf_iostat_lk); 796 } 797 798 /* 799 * Add an IO requests onto the active queue. 800 * 801 * We have to detect IOs generated by xdf_ready_tq_thread. These IOs 802 * are used to establish a connection to the backend, so they receive 803 * priority over all other IOs. Since xdf_ready_tq_thread only does 804 * synchronous IO, there can only be one xdf_ready_tq_thread request at any 805 * given time and we record the buf associated with that request in 806 * xdf_ready_tq_bp. 807 */ 808 static void 809 xdf_bp_push(xdf_t *vdp, buf_t *bp) 810 { 811 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 812 ASSERT(bp->av_forw == NULL); 813 814 xdf_kstat_enter(vdp, bp); 815 816 if (curthread == vdp->xdf_ready_tq_thread) { 817 /* new IO requests from the ready thread */ 818 ASSERT(vdp->xdf_ready_tq_bp == NULL); 819 vdp->xdf_ready_tq_bp = bp; 820 return; 821 } 822 823 /* this is normal IO request */ 824 ASSERT(bp != vdp->xdf_ready_tq_bp); 825 826 if (vdp->xdf_f_act == NULL) { 827 /* this is only only IO on the active queue */ 828 ASSERT(vdp->xdf_l_act == NULL); 829 ASSERT(vdp->xdf_i_act == NULL); 830 vdp->xdf_f_act = vdp->xdf_l_act = vdp->xdf_i_act = bp; 831 return; 832 } 833 834 /* add this IO to the tail of the active queue */ 835 vdp->xdf_l_act->av_forw = bp; 836 vdp->xdf_l_act = bp; 837 if (vdp->xdf_i_act == NULL) 838 vdp->xdf_i_act = bp; 839 } 840 841 static void 842 xdf_bp_pop(xdf_t *vdp, buf_t *bp) 843 { 844 buf_t *bp_iter; 845 846 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 847 ASSERT(VREQ_DONE(BP_VREQ(bp))); 848 849 if (vdp->xdf_ready_tq_bp == bp) { 850 /* we're done with a ready thread IO request */ 851 ASSERT(bp->av_forw == NULL); 852 vdp->xdf_ready_tq_bp = NULL; 853 return; 854 } 855 856 /* we're done with a normal IO request */ 857 ASSERT((bp->av_forw != NULL) || (bp == vdp->xdf_l_act)); 858 ASSERT((bp->av_forw == NULL) || (bp != vdp->xdf_l_act)); 859 ASSERT(VREQ_DONE(BP_VREQ(vdp->xdf_f_act))); 860 ASSERT(vdp->xdf_f_act != vdp->xdf_i_act); 861 862 if (bp == vdp->xdf_f_act) { 863 /* This IO was at the head of our active queue. */ 864 vdp->xdf_f_act = bp->av_forw; 865 if (bp == vdp->xdf_l_act) 866 vdp->xdf_l_act = NULL; 867 } else { 868 /* There IO finished before some other pending IOs. */ 869 bp_iter = vdp->xdf_f_act; 870 while (bp != bp_iter->av_forw) { 871 bp_iter = bp_iter->av_forw; 872 ASSERT(VREQ_DONE(BP_VREQ(bp_iter))); 873 ASSERT(bp_iter != vdp->xdf_i_act); 874 } 875 bp_iter->av_forw = bp->av_forw; 876 if (bp == vdp->xdf_l_act) 877 vdp->xdf_l_act = bp_iter; 878 } 879 bp->av_forw = NULL; 880 } 881 882 static buf_t * 883 xdf_bp_next(xdf_t *vdp) 884 { 885 v_req_t *vreq; 886 buf_t *bp; 887 888 if (vdp->xdf_state == XD_CONNECTED) { 889 /* 890 * If we're in the XD_CONNECTED state, we only service IOs 891 * from the xdf_ready_tq_thread thread. 892 */ 893 if ((bp = vdp->xdf_ready_tq_bp) == NULL) 894 return (NULL); 895 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 896 return (bp); 897 return (NULL); 898 } 899 900 /* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */ 901 if (vdp->xdf_state != XD_READY) 902 return (NULL); 903 904 ASSERT(vdp->xdf_ready_tq_bp == NULL); 905 for (;;) { 906 if ((bp = vdp->xdf_i_act) == NULL) 907 return (NULL); 908 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 909 return (bp); 910 911 /* advance the active buf index pointer */ 912 vdp->xdf_i_act = bp->av_forw; 913 } 914 } 915 916 static void 917 xdf_io_fini(xdf_t *vdp, uint64_t id, int bioerr) 918 { 919 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 920 v_req_t *vreq = gs->gs_vreq; 921 buf_t *bp = vreq->v_buf; 922 923 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 924 ASSERT(BP_VREQ(bp) == vreq); 925 926 gs_free(gs); 927 928 if (bioerr != 0) 929 bioerror(bp, bioerr); 930 ASSERT(vreq->v_nslots > 0); 931 if (--vreq->v_nslots > 0) 932 return; 933 934 /* remove this IO from our active queue */ 935 xdf_bp_pop(vdp, bp); 936 937 ASSERT(vreq->v_runq); 938 xdf_kstat_exit(vdp, bp); 939 vreq->v_runq = B_FALSE; 940 vreq_free(vdp, vreq); 941 942 if (IS_ERROR(bp)) { 943 xdf_io_err(bp, geterror(bp), 0); 944 } else if (bp->b_resid != 0) { 945 /* Partial transfers are an error */ 946 xdf_io_err(bp, EIO, bp->b_resid); 947 } else { 948 biodone(bp); 949 } 950 } 951 952 /* 953 * xdf interrupt handler 954 */ 955 static uint_t 956 xdf_intr_locked(xdf_t *vdp) 957 { 958 xendev_ring_t *xbr; 959 blkif_response_t *resp; 960 int bioerr; 961 uint64_t id; 962 uint8_t op; 963 uint16_t status; 964 ddi_acc_handle_t acchdl; 965 966 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 967 968 if ((xbr = vdp->xdf_xb_ring) == NULL) 969 return (DDI_INTR_UNCLAIMED); 970 971 acchdl = vdp->xdf_xb_ring_hdl; 972 973 /* 974 * complete all requests which have a response 975 */ 976 while (resp = xvdi_ring_get_response(xbr)) { 977 id = ddi_get64(acchdl, &resp->id); 978 op = ddi_get8(acchdl, &resp->operation); 979 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 980 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 981 op, id, status)); 982 983 if (status != BLKIF_RSP_OKAY) { 984 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 985 vdp->xdf_addr, 986 (op == BLKIF_OP_READ) ? "reading" : "writing")); 987 bioerr = EIO; 988 } else { 989 bioerr = 0; 990 } 991 992 xdf_io_fini(vdp, id, bioerr); 993 } 994 return (DDI_INTR_CLAIMED); 995 } 996 997 /* 998 * xdf_intr runs at PIL 5, so no one else can grab xdf_dev_lk and 999 * block at a lower pil. 1000 */ 1001 static uint_t 1002 xdf_intr(caddr_t arg) 1003 { 1004 xdf_t *vdp = (xdf_t *)arg; 1005 int rv; 1006 1007 mutex_enter(&vdp->xdf_dev_lk); 1008 rv = xdf_intr_locked(vdp); 1009 mutex_exit(&vdp->xdf_dev_lk); 1010 1011 if (!do_polled_io) 1012 xdf_io_start(vdp); 1013 1014 return (rv); 1015 } 1016 1017 static void 1018 xdf_ring_push(xdf_t *vdp) 1019 { 1020 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1021 1022 if (vdp->xdf_xb_ring == NULL) 1023 return; 1024 1025 if (xvdi_ring_push_request(vdp->xdf_xb_ring)) { 1026 DPRINTF(IO_DBG, ( 1027 "xdf@%s: xdf_ring_push: sent request(s) to backend\n", 1028 vdp->xdf_addr)); 1029 } 1030 1031 if (xvdi_get_evtchn(vdp->xdf_dip) != INVALID_EVTCHN) 1032 xvdi_notify_oe(vdp->xdf_dip); 1033 } 1034 1035 static int 1036 xdf_ring_drain_locked(xdf_t *vdp) 1037 { 1038 int pollc, rv = 0; 1039 1040 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1041 1042 if (xdf_debug & SUSRES_DBG) 1043 xen_printf("xdf_ring_drain: start\n"); 1044 1045 for (pollc = 0; pollc < XDF_DRAIN_RETRY_COUNT; pollc++) { 1046 if (vdp->xdf_xb_ring == NULL) 1047 goto out; 1048 1049 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1050 (void) xdf_intr_locked(vdp); 1051 if (!xvdi_ring_has_incomp_request(vdp->xdf_xb_ring)) 1052 goto out; 1053 xdf_ring_push(vdp); 1054 1055 /* file-backed devices can be slow */ 1056 mutex_exit(&vdp->xdf_dev_lk); 1057 #ifdef XPV_HVM_DRIVER 1058 (void) HYPERVISOR_yield(); 1059 #endif /* XPV_HVM_DRIVER */ 1060 delay(drv_usectohz(XDF_DRAIN_MSEC_DELAY)); 1061 mutex_enter(&vdp->xdf_dev_lk); 1062 } 1063 cmn_err(CE_WARN, "xdf@%s: xdf_ring_drain: timeout", vdp->xdf_addr); 1064 1065 out: 1066 if (vdp->xdf_xb_ring != NULL) { 1067 if (xvdi_ring_has_incomp_request(vdp->xdf_xb_ring) || 1068 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1069 rv = EIO; 1070 } 1071 if (xdf_debug & SUSRES_DBG) 1072 xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n", 1073 vdp->xdf_addr, rv); 1074 return (rv); 1075 } 1076 1077 static int 1078 xdf_ring_drain(xdf_t *vdp) 1079 { 1080 int rv; 1081 mutex_enter(&vdp->xdf_dev_lk); 1082 rv = xdf_ring_drain_locked(vdp); 1083 mutex_exit(&vdp->xdf_dev_lk); 1084 return (rv); 1085 } 1086 1087 /* 1088 * Destroy all v_req_t, grant table entries, and our ring buffer. 1089 */ 1090 static void 1091 xdf_ring_destroy(xdf_t *vdp) 1092 { 1093 v_req_t *vreq; 1094 buf_t *bp; 1095 ge_slot_t *gs; 1096 1097 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1098 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1099 1100 if ((vdp->xdf_state != XD_INIT) && 1101 (vdp->xdf_state != XD_CONNECTED) && 1102 (vdp->xdf_state != XD_READY)) { 1103 ASSERT(vdp->xdf_xb_ring == NULL); 1104 ASSERT(vdp->xdf_xb_ring_hdl == NULL); 1105 ASSERT(vdp->xdf_peer == INVALID_DOMID); 1106 ASSERT(vdp->xdf_evtchn == INVALID_EVTCHN); 1107 ASSERT(list_is_empty(&vdp->xdf_vreq_act)); 1108 return; 1109 } 1110 1111 /* 1112 * We don't want to receive async notifications from the backend 1113 * when it finishes processing ring entries. 1114 */ 1115 #ifdef XPV_HVM_DRIVER 1116 ec_unbind_evtchn(vdp->xdf_evtchn); 1117 #else /* !XPV_HVM_DRIVER */ 1118 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1119 #endif /* !XPV_HVM_DRIVER */ 1120 1121 /* 1122 * Drain any requests in the ring. We need to do this before we 1123 * can free grant table entries, because if active ring entries 1124 * point to grants, then the backend could be trying to access 1125 * those grants. 1126 */ 1127 (void) xdf_ring_drain_locked(vdp); 1128 1129 /* We're done talking to the backend so free up our event channel */ 1130 xvdi_free_evtchn(vdp->xdf_dip); 1131 vdp->xdf_evtchn = INVALID_EVTCHN; 1132 1133 while ((vreq = list_head(&vdp->xdf_vreq_act)) != NULL) { 1134 bp = vreq->v_buf; 1135 ASSERT(BP_VREQ(bp) == vreq); 1136 1137 /* Free up any grant table entries associaed with this IO */ 1138 while ((gs = list_head(&vreq->v_gs)) != NULL) 1139 gs_free(gs); 1140 1141 /* If this IO was on the runq, move it back to the waitq. */ 1142 if (vreq->v_runq) 1143 xdf_kstat_runq_to_waitq(vdp, bp); 1144 1145 /* 1146 * Reset any buf IO state since we're going to re-issue the 1147 * IO when we reconnect. 1148 */ 1149 vreq_free(vdp, vreq); 1150 BP_VREQ_SET(bp, NULL); 1151 bioerror(bp, 0); 1152 } 1153 1154 /* reset the active queue index pointer */ 1155 vdp->xdf_i_act = vdp->xdf_f_act; 1156 1157 /* Destroy the ring */ 1158 xvdi_free_ring(vdp->xdf_xb_ring); 1159 vdp->xdf_xb_ring = NULL; 1160 vdp->xdf_xb_ring_hdl = NULL; 1161 vdp->xdf_peer = INVALID_DOMID; 1162 } 1163 1164 void 1165 xdfmin(struct buf *bp) 1166 { 1167 if (bp->b_bcount > xdf_maxphys) 1168 bp->b_bcount = xdf_maxphys; 1169 } 1170 1171 /* 1172 * Check if we have a pending "eject" media request. 1173 */ 1174 static int 1175 xdf_eject_pending(xdf_t *vdp) 1176 { 1177 dev_info_t *dip = vdp->xdf_dip; 1178 char *xsname, *str; 1179 1180 if (!vdp->xdf_media_req_supported) 1181 return (B_FALSE); 1182 1183 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1184 (xenbus_read_str(xsname, XBP_MEDIA_REQ, &str) != 0)) 1185 return (B_FALSE); 1186 1187 if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) { 1188 strfree(str); 1189 return (B_FALSE); 1190 } 1191 strfree(str); 1192 return (B_TRUE); 1193 } 1194 1195 /* 1196 * Generate a media request. 1197 */ 1198 static int 1199 xdf_media_req(xdf_t *vdp, char *req, boolean_t media_required) 1200 { 1201 dev_info_t *dip = vdp->xdf_dip; 1202 char *xsname; 1203 1204 /* 1205 * we can't be holding xdf_dev_lk because xenbus_printf() can 1206 * block while waiting for a PIL 1 interrupt message. this 1207 * would cause a deadlock with xdf_intr() which needs to grab 1208 * xdf_dev_lk as well and runs at PIL 5. 1209 */ 1210 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1211 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1212 1213 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1214 return (ENXIO); 1215 1216 /* Check if we support media requests */ 1217 if (!XD_IS_CD(vdp) || !vdp->xdf_media_req_supported) 1218 return (ENOTTY); 1219 1220 /* If an eject is pending then don't allow any new requests */ 1221 if (xdf_eject_pending(vdp)) 1222 return (ENXIO); 1223 1224 /* Make sure that there is media present */ 1225 if (media_required && (vdp->xdf_xdev_nblocks == 0)) 1226 return (ENXIO); 1227 1228 /* We only allow operations when the device is ready and connected */ 1229 if (vdp->xdf_state != XD_READY) 1230 return (EIO); 1231 1232 if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ, "%s", req) != 0) 1233 return (EIO); 1234 1235 return (0); 1236 } 1237 1238 /* 1239 * populate a single blkif_request_t w/ a buf 1240 */ 1241 static void 1242 xdf_process_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1243 { 1244 grant_ref_t gr; 1245 uint8_t fsect, lsect; 1246 size_t bcnt; 1247 paddr_t dma_addr; 1248 off_t blk_off; 1249 dev_info_t *dip = vdp->xdf_dip; 1250 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1251 v_req_t *vreq = BP_VREQ(bp); 1252 uint64_t blkno = vreq->v_blkno; 1253 uint_t ndmacs = vreq->v_ndmacs; 1254 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1255 int seg = 0; 1256 int isread = IS_READ(bp); 1257 ge_slot_t *gs = list_head(&vreq->v_gs); 1258 1259 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1260 ASSERT(vreq->v_status == VREQ_GS_ALLOCED); 1261 1262 if (isread) 1263 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1264 else { 1265 switch (vreq->v_flush_diskcache) { 1266 case FLUSH_DISKCACHE: 1267 ddi_put8(acchdl, &rreq->operation, 1268 BLKIF_OP_FLUSH_DISKCACHE); 1269 ddi_put16(acchdl, &rreq->handle, vdev); 1270 ddi_put64(acchdl, &rreq->id, 1271 (uint64_t)(uintptr_t)(gs)); 1272 ddi_put8(acchdl, &rreq->nr_segments, 0); 1273 vreq->v_status = VREQ_DMAWIN_DONE; 1274 return; 1275 case WRITE_BARRIER: 1276 ddi_put8(acchdl, &rreq->operation, 1277 BLKIF_OP_WRITE_BARRIER); 1278 break; 1279 default: 1280 if (!vdp->xdf_wce) 1281 ddi_put8(acchdl, &rreq->operation, 1282 BLKIF_OP_WRITE_BARRIER); 1283 else 1284 ddi_put8(acchdl, &rreq->operation, 1285 BLKIF_OP_WRITE); 1286 break; 1287 } 1288 } 1289 1290 ddi_put16(acchdl, &rreq->handle, vdev); 1291 ddi_put64(acchdl, &rreq->sector_number, blkno); 1292 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(gs)); 1293 1294 /* 1295 * loop until all segments are populated or no more dma cookie in buf 1296 */ 1297 for (;;) { 1298 /* 1299 * Each segment of a blkif request can transfer up to 1300 * one 4K page of data. 1301 */ 1302 bcnt = vreq->v_dmac.dmac_size; 1303 dma_addr = vreq->v_dmac.dmac_laddress; 1304 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1305 fsect = blk_off >> XB_BSHIFT; 1306 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1307 1308 ASSERT(bcnt <= PAGESIZE); 1309 ASSERT((bcnt % XB_BSIZE) == 0); 1310 ASSERT((blk_off & XB_BMASK) == 0); 1311 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1312 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1313 1314 gr = gs_grant(gs, PATOMA(dma_addr) >> PAGESHIFT); 1315 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1316 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1317 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1318 1319 DPRINTF(IO_DBG, ( 1320 "xdf@%s: seg%d: dmacS %lu blk_off %ld\n", 1321 vdp->xdf_addr, seg, vreq->v_dmac.dmac_size, blk_off)); 1322 DPRINTF(IO_DBG, ( 1323 "xdf@%s: seg%d: fs %d ls %d gr %d dma 0x%"PRIx64"\n", 1324 vdp->xdf_addr, seg, fsect, lsect, gr, dma_addr)); 1325 1326 blkno += (bcnt >> XB_BSHIFT); 1327 seg++; 1328 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1329 if (--ndmacs) { 1330 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1331 continue; 1332 } 1333 1334 vreq->v_status = VREQ_DMAWIN_DONE; 1335 vreq->v_blkno = blkno; 1336 break; 1337 } 1338 ddi_put8(acchdl, &rreq->nr_segments, seg); 1339 DPRINTF(IO_DBG, ( 1340 "xdf@%s: xdf_process_rreq: request id=%"PRIx64" ready\n", 1341 vdp->xdf_addr, rreq->id)); 1342 } 1343 1344 static void 1345 xdf_io_start(xdf_t *vdp) 1346 { 1347 struct buf *bp; 1348 v_req_t *vreq; 1349 blkif_request_t *rreq; 1350 boolean_t rreqready = B_FALSE; 1351 1352 mutex_enter(&vdp->xdf_dev_lk); 1353 1354 /* 1355 * Populate the ring request(s). Loop until there is no buf to 1356 * transfer or no free slot available in I/O ring. 1357 */ 1358 for (;;) { 1359 /* don't start any new IO if we're suspending */ 1360 if (vdp->xdf_suspending) 1361 break; 1362 if ((bp = xdf_bp_next(vdp)) == NULL) 1363 break; 1364 1365 /* if the buf doesn't already have a vreq, allocate one */ 1366 if (((vreq = BP_VREQ(bp)) == NULL) && 1367 ((vreq = vreq_get(vdp, bp)) == NULL)) 1368 break; 1369 1370 /* alloc DMA/GTE resources */ 1371 if (vreq_setup(vdp, vreq) != DDI_SUCCESS) 1372 break; 1373 1374 /* get next blkif_request in the ring */ 1375 if ((rreq = xvdi_ring_get_request(vdp->xdf_xb_ring)) == NULL) 1376 break; 1377 bzero(rreq, sizeof (blkif_request_t)); 1378 rreqready = B_TRUE; 1379 1380 /* populate blkif_request with this buf */ 1381 xdf_process_rreq(vdp, bp, rreq); 1382 1383 /* 1384 * This buffer/vreq pair is has been allocated a ring buffer 1385 * resources, so if it isn't already in our runq, add it. 1386 */ 1387 if (!vreq->v_runq) 1388 xdf_kstat_waitq_to_runq(vdp, bp); 1389 } 1390 1391 /* Send the request(s) to the backend */ 1392 if (rreqready) 1393 xdf_ring_push(vdp); 1394 1395 mutex_exit(&vdp->xdf_dev_lk); 1396 } 1397 1398 1399 /* check if partition is open, -1 - check all partitions on the disk */ 1400 static boolean_t 1401 xdf_isopen(xdf_t *vdp, int partition) 1402 { 1403 int i; 1404 ulong_t parbit; 1405 boolean_t rval = B_FALSE; 1406 1407 ASSERT((partition == -1) || 1408 ((partition >= 0) || (partition < XDF_PEXT))); 1409 1410 if (partition == -1) 1411 parbit = (ulong_t)-1; 1412 else 1413 parbit = 1 << partition; 1414 1415 for (i = 0; i < OTYPCNT; i++) { 1416 if (vdp->xdf_vd_open[i] & parbit) 1417 rval = B_TRUE; 1418 } 1419 1420 return (rval); 1421 } 1422 1423 /* 1424 * The connection should never be closed as long as someone is holding 1425 * us open, there is pending IO, or someone is waiting waiting for a 1426 * connection. 1427 */ 1428 static boolean_t 1429 xdf_busy(xdf_t *vdp) 1430 { 1431 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1432 1433 if ((vdp->xdf_xb_ring != NULL) && 1434 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 1435 ASSERT(vdp->xdf_state != XD_CLOSED); 1436 return (B_TRUE); 1437 } 1438 1439 if (!list_is_empty(&vdp->xdf_vreq_act) || (vdp->xdf_f_act != NULL)) { 1440 ASSERT(vdp->xdf_state != XD_CLOSED); 1441 return (B_TRUE); 1442 } 1443 1444 if (xdf_isopen(vdp, -1)) { 1445 ASSERT(vdp->xdf_state != XD_CLOSED); 1446 return (B_TRUE); 1447 } 1448 1449 if (vdp->xdf_connect_req > 0) { 1450 ASSERT(vdp->xdf_state != XD_CLOSED); 1451 return (B_TRUE); 1452 } 1453 1454 return (B_FALSE); 1455 } 1456 1457 static void 1458 xdf_set_state(xdf_t *vdp, xdf_state_t new_state) 1459 { 1460 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1461 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1462 DPRINTF(DDI_DBG, ("xdf@%s: state change %d -> %d\n", 1463 vdp->xdf_addr, vdp->xdf_state, new_state)); 1464 vdp->xdf_state = new_state; 1465 cv_broadcast(&vdp->xdf_dev_cv); 1466 } 1467 1468 static void 1469 xdf_disconnect(xdf_t *vdp, xdf_state_t new_state, boolean_t quiet) 1470 { 1471 dev_info_t *dip = vdp->xdf_dip; 1472 boolean_t busy; 1473 1474 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1475 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1476 ASSERT((new_state == XD_UNKNOWN) || (new_state == XD_CLOSED)); 1477 1478 /* Check if we're already there. */ 1479 if (vdp->xdf_state == new_state) 1480 return; 1481 1482 mutex_enter(&vdp->xdf_dev_lk); 1483 busy = xdf_busy(vdp); 1484 1485 /* If we're already closed then there's nothing todo. */ 1486 if (vdp->xdf_state == XD_CLOSED) { 1487 ASSERT(!busy); 1488 xdf_set_state(vdp, new_state); 1489 mutex_exit(&vdp->xdf_dev_lk); 1490 return; 1491 } 1492 1493 #ifdef DEBUG 1494 /* UhOh. Warn the user that something bad has happened. */ 1495 if (!quiet && busy && (vdp->xdf_state == XD_READY) && 1496 (vdp->xdf_xdev_nblocks != 0)) { 1497 cmn_err(CE_WARN, "xdf@%s: disconnected while in use", 1498 vdp->xdf_addr); 1499 } 1500 #endif /* DEBUG */ 1501 1502 xdf_ring_destroy(vdp); 1503 1504 /* If we're busy then we can only go into the unknown state */ 1505 xdf_set_state(vdp, (busy) ? XD_UNKNOWN : new_state); 1506 mutex_exit(&vdp->xdf_dev_lk); 1507 1508 /* if we're closed now, let the other end know */ 1509 if (vdp->xdf_state == XD_CLOSED) 1510 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1511 } 1512 1513 1514 /* 1515 * Kick-off connect process 1516 * Status should be XD_UNKNOWN or XD_CLOSED 1517 * On success, status will be changed to XD_INIT 1518 * On error, it will be changed to XD_UNKNOWN 1519 */ 1520 static int 1521 xdf_setstate_init(xdf_t *vdp) 1522 { 1523 dev_info_t *dip = vdp->xdf_dip; 1524 xenbus_transaction_t xbt; 1525 grant_ref_t gref; 1526 char *xsname, *str; 1527 int rv; 1528 1529 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1530 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1531 ASSERT((vdp->xdf_state == XD_UNKNOWN) || 1532 (vdp->xdf_state == XD_CLOSED)); 1533 1534 DPRINTF(DDI_DBG, 1535 ("xdf@%s: starting connection process\n", vdp->xdf_addr)); 1536 1537 /* 1538 * If an eject is pending then don't allow a new connection. 1539 * (Only the backend can clear media request eject request.) 1540 */ 1541 if (xdf_eject_pending(vdp)) 1542 return (DDI_FAILURE); 1543 1544 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1545 goto errout; 1546 1547 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == INVALID_DOMID) 1548 goto errout; 1549 1550 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialising); 1551 1552 /* 1553 * Sanity check for the existance of the xenbus device-type property. 1554 * This property might not exist if our xenbus device nodes were 1555 * force destroyed while we were still connected to the backend. 1556 */ 1557 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) 1558 goto errout; 1559 strfree(str); 1560 1561 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) 1562 goto errout; 1563 1564 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1565 #ifdef XPV_HVM_DRIVER 1566 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1567 #else /* !XPV_HVM_DRIVER */ 1568 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1569 DDI_SUCCESS) { 1570 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_init: " 1571 "failed to add intr handler", vdp->xdf_addr); 1572 goto errout1; 1573 } 1574 #endif /* !XPV_HVM_DRIVER */ 1575 1576 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1577 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1578 DDI_SUCCESS) { 1579 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1580 vdp->xdf_addr); 1581 goto errout2; 1582 } 1583 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1584 1585 /* 1586 * Write into xenstore the info needed by backend 1587 */ 1588 trans_retry: 1589 if (xenbus_transaction_start(&xbt)) { 1590 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1591 vdp->xdf_addr); 1592 xvdi_fatal_error(dip, EIO, "connect transaction init"); 1593 goto fail_trans; 1594 } 1595 1596 /* 1597 * XBP_PROTOCOL is written by the domain builder in the case of PV 1598 * domains. However, it is not written for HVM domains, so let's 1599 * write it here. 1600 */ 1601 if (((rv = xenbus_printf(xbt, xsname, 1602 XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE)) != 0) || 1603 ((rv = xenbus_printf(xbt, xsname, 1604 XBP_RING_REF, "%u", gref)) != 0) || 1605 ((rv = xenbus_printf(xbt, xsname, 1606 XBP_EVENT_CHAN, "%u", vdp->xdf_evtchn)) != 0) || 1607 ((rv = xenbus_printf(xbt, xsname, 1608 XBP_PROTOCOL, "%s", XEN_IO_PROTO_ABI_NATIVE)) != 0) || 1609 ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0)) { 1610 (void) xenbus_transaction_end(xbt, 1); 1611 xvdi_fatal_error(dip, rv, "connect transaction setup"); 1612 goto fail_trans; 1613 } 1614 1615 /* kick-off connect process */ 1616 if (rv = xenbus_transaction_end(xbt, 0)) { 1617 if (rv == EAGAIN) 1618 goto trans_retry; 1619 xvdi_fatal_error(dip, rv, "connect transaction commit"); 1620 goto fail_trans; 1621 } 1622 1623 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1624 mutex_enter(&vdp->xdf_dev_lk); 1625 xdf_set_state(vdp, XD_INIT); 1626 mutex_exit(&vdp->xdf_dev_lk); 1627 1628 return (DDI_SUCCESS); 1629 1630 fail_trans: 1631 xvdi_free_ring(vdp->xdf_xb_ring); 1632 errout2: 1633 #ifdef XPV_HVM_DRIVER 1634 ec_unbind_evtchn(vdp->xdf_evtchn); 1635 #else /* !XPV_HVM_DRIVER */ 1636 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1637 #endif /* !XPV_HVM_DRIVER */ 1638 errout1: 1639 xvdi_free_evtchn(dip); 1640 vdp->xdf_evtchn = INVALID_EVTCHN; 1641 errout: 1642 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1643 cmn_err(CE_WARN, "xdf@%s: failed to start connection to backend", 1644 vdp->xdf_addr); 1645 return (DDI_FAILURE); 1646 } 1647 1648 int 1649 xdf_get_flush_block(xdf_t *vdp) 1650 { 1651 /* 1652 * Get a DEV_BSIZE aligned bufer 1653 */ 1654 vdp->xdf_flush_mem = kmem_alloc(vdp->xdf_xdev_secsize * 2, KM_SLEEP); 1655 vdp->xdf_cache_flush_block = 1656 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), 1657 (int)vdp->xdf_xdev_secsize); 1658 1659 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1660 xdf_flush_block, vdp->xdf_xdev_secsize, NULL) != 0) 1661 return (DDI_FAILURE); 1662 return (DDI_SUCCESS); 1663 } 1664 1665 static void 1666 xdf_setstate_ready(void *arg) 1667 { 1668 xdf_t *vdp = (xdf_t *)arg; 1669 dev_info_t *dip = vdp->xdf_dip; 1670 1671 vdp->xdf_ready_tq_thread = curthread; 1672 1673 /* Create minor nodes now when we are almost connected */ 1674 mutex_enter(&vdp->xdf_dev_lk); 1675 if (vdp->xdf_cmlb_reattach) { 1676 vdp->xdf_cmlb_reattach = B_FALSE; 1677 mutex_exit(&vdp->xdf_dev_lk); 1678 if (xdf_cmlb_attach(vdp) != 0) { 1679 cmn_err(CE_WARN, 1680 "xdf@%s: cmlb attach failed", 1681 ddi_get_name_addr(dip)); 1682 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1683 return; 1684 } 1685 mutex_enter(&vdp->xdf_dev_lk); 1686 } 1687 1688 /* If we're not still trying to get to the ready state, then bail. */ 1689 if (vdp->xdf_state != XD_CONNECTED) { 1690 mutex_exit(&vdp->xdf_dev_lk); 1691 return; 1692 } 1693 mutex_exit(&vdp->xdf_dev_lk); 1694 1695 /* 1696 * If backend has feature-barrier, see if it supports disk 1697 * cache flush op. 1698 */ 1699 vdp->xdf_flush_supported = B_FALSE; 1700 if (vdp->xdf_feature_barrier) { 1701 /* 1702 * Pretend we already know flush is supported so probe 1703 * will attempt the correct op. 1704 */ 1705 vdp->xdf_flush_supported = B_TRUE; 1706 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1707 vdp->xdf_flush_supported = B_TRUE; 1708 } else { 1709 vdp->xdf_flush_supported = B_FALSE; 1710 /* 1711 * If the other end does not support the cache flush op 1712 * then we must use a barrier-write to force disk 1713 * cache flushing. Barrier writes require that a data 1714 * block actually be written. 1715 * Cache a block to barrier-write when we are 1716 * asked to perform a flush. 1717 * XXX - would it be better to just copy 1 block 1718 * (512 bytes) from whatever write we did last 1719 * and rewrite that block? 1720 */ 1721 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) { 1722 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1723 return; 1724 } 1725 } 1726 } 1727 1728 mutex_enter(&vdp->xdf_cb_lk); 1729 mutex_enter(&vdp->xdf_dev_lk); 1730 if (vdp->xdf_state == XD_CONNECTED) 1731 xdf_set_state(vdp, XD_READY); 1732 mutex_exit(&vdp->xdf_dev_lk); 1733 1734 /* Restart any currently queued up io */ 1735 xdf_io_start(vdp); 1736 1737 mutex_exit(&vdp->xdf_cb_lk); 1738 } 1739 1740 /* 1741 * synthetic geometry 1742 */ 1743 #define XDF_NSECTS 256 1744 #define XDF_NHEADS 16 1745 1746 static void 1747 xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp) 1748 { 1749 xdf_t *vdp; 1750 uint_t ncyl; 1751 1752 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 1753 1754 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); 1755 1756 bzero(geomp, sizeof (*geomp)); 1757 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1758 geomp->g_acyl = 0; 1759 geomp->g_nhead = XDF_NHEADS; 1760 geomp->g_nsect = XDF_NSECTS; 1761 geomp->g_secsize = vdp->xdf_xdev_secsize; 1762 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1763 geomp->g_intrlv = 0; 1764 geomp->g_rpm = 7200; 1765 } 1766 1767 /* 1768 * Finish other initialization after we've connected to backend 1769 * Status should be XD_INIT before calling this routine 1770 * On success, status should be changed to XD_CONNECTED. 1771 * On error, status should stay XD_INIT 1772 */ 1773 static int 1774 xdf_setstate_connected(xdf_t *vdp) 1775 { 1776 dev_info_t *dip = vdp->xdf_dip; 1777 cmlb_geom_t pgeom; 1778 diskaddr_t nblocks = 0; 1779 uint_t secsize = 0; 1780 char *oename, *xsname, *str; 1781 uint_t dinfo; 1782 1783 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1784 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1785 ASSERT(vdp->xdf_state == XD_INIT); 1786 1787 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1788 ((oename = xvdi_get_oename(dip)) == NULL)) 1789 return (DDI_FAILURE); 1790 1791 /* Make sure the other end is XenbusStateConnected */ 1792 if (xenbus_read_driver_state(oename) != XenbusStateConnected) 1793 return (DDI_FAILURE); 1794 1795 /* Determine if feature barrier is supported by backend */ 1796 if (!(vdp->xdf_feature_barrier = xenbus_exists(oename, XBP_FB))) 1797 cmn_err(CE_NOTE, "!xdf@%s: feature-barrier not supported", 1798 vdp->xdf_addr); 1799 1800 /* 1801 * Probe backend. Read the device size into xdf_xdev_nblocks 1802 * and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE 1803 * flags in xdf_dinfo. If the emulated device type is "cdrom", 1804 * we always set VDISK_CDROM, regardless of if it's present in 1805 * the xenbus info parameter. 1806 */ 1807 if (xenbus_gather(XBT_NULL, oename, 1808 XBP_SECTORS, "%"SCNu64, &nblocks, 1809 XBP_SECTOR_SIZE, "%u", &secsize, 1810 XBP_INFO, "%u", &dinfo, 1811 NULL) != 0) { 1812 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1813 "cannot read backend info", vdp->xdf_addr); 1814 return (DDI_FAILURE); 1815 } 1816 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 1817 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 1818 vdp->xdf_addr); 1819 return (DDI_FAILURE); 1820 } 1821 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 1822 dinfo |= VDISK_CDROM; 1823 strfree(str); 1824 1825 if (secsize == 0 || !(ISP2(secsize / DEV_BSIZE))) 1826 secsize = DEV_BSIZE; 1827 vdp->xdf_xdev_nblocks = nblocks; 1828 vdp->xdf_xdev_secsize = secsize; 1829 #ifdef _ILP32 1830 if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) { 1831 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1832 "backend disk device too large with %llu blocks for" 1833 " 32-bit kernel", vdp->xdf_addr, vdp->xdf_xdev_nblocks); 1834 xvdi_fatal_error(dip, EFBIG, "reading backend info"); 1835 return (DDI_FAILURE); 1836 } 1837 #endif 1838 1839 /* 1840 * If the physical geometry for a fixed disk has been explicity 1841 * set then make sure that the specified physical geometry isn't 1842 * larger than the device we connected to. 1843 */ 1844 if (vdp->xdf_pgeom_fixed && 1845 (vdp->xdf_pgeom.g_capacity > vdp->xdf_xdev_nblocks)) { 1846 cmn_err(CE_WARN, 1847 "xdf@%s: connect failed, fixed geometry too large", 1848 vdp->xdf_addr); 1849 return (DDI_FAILURE); 1850 } 1851 1852 vdp->xdf_media_req_supported = xenbus_exists(oename, XBP_MEDIA_REQ_SUP); 1853 1854 /* mark vbd is ready for I/O */ 1855 mutex_enter(&vdp->xdf_dev_lk); 1856 xdf_set_state(vdp, XD_CONNECTED); 1857 1858 /* check if the cmlb label should be updated */ 1859 xdf_synthetic_pgeom(dip, &pgeom); 1860 if ((vdp->xdf_dinfo != dinfo) || 1861 (!vdp->xdf_pgeom_fixed && 1862 (memcmp(&vdp->xdf_pgeom, &pgeom, sizeof (pgeom)) != 0))) { 1863 vdp->xdf_cmlb_reattach = B_TRUE; 1864 1865 vdp->xdf_dinfo = dinfo; 1866 if (!vdp->xdf_pgeom_fixed) 1867 vdp->xdf_pgeom = pgeom; 1868 } 1869 1870 if (XD_IS_CD(vdp) || XD_IS_RM(vdp)) { 1871 if (vdp->xdf_xdev_nblocks == 0) { 1872 vdp->xdf_mstate = DKIO_EJECTED; 1873 cv_broadcast(&vdp->xdf_mstate_cv); 1874 } else { 1875 vdp->xdf_mstate = DKIO_INSERTED; 1876 cv_broadcast(&vdp->xdf_mstate_cv); 1877 } 1878 } else { 1879 if (vdp->xdf_mstate != DKIO_NONE) { 1880 vdp->xdf_mstate = DKIO_NONE; 1881 cv_broadcast(&vdp->xdf_mstate_cv); 1882 } 1883 } 1884 1885 mutex_exit(&vdp->xdf_dev_lk); 1886 1887 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", vdp->xdf_addr, 1888 (uint64_t)vdp->xdf_xdev_nblocks); 1889 1890 /* Restart any currently queued up io */ 1891 xdf_io_start(vdp); 1892 1893 /* 1894 * To get to the ready state we have to do IO to the backend device, 1895 * but we can't initiate IO from the other end change callback thread 1896 * (which is the current context we're executing in.) This is because 1897 * if the other end disconnects while we're doing IO from the callback 1898 * thread, then we can't receive that disconnect event and we hang 1899 * waiting for an IO that can never complete. 1900 */ 1901 (void) ddi_taskq_dispatch(vdp->xdf_ready_tq, xdf_setstate_ready, vdp, 1902 DDI_SLEEP); 1903 1904 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1905 return (DDI_SUCCESS); 1906 } 1907 1908 /*ARGSUSED*/ 1909 static void 1910 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 1911 { 1912 XenbusState new_state = *(XenbusState *)impl_data; 1913 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 1914 1915 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 1916 vdp->xdf_addr, new_state)); 1917 1918 mutex_enter(&vdp->xdf_cb_lk); 1919 1920 /* We assume that this callback is single threaded */ 1921 ASSERT(vdp->xdf_oe_change_thread == NULL); 1922 DEBUG_EVAL(vdp->xdf_oe_change_thread = curthread); 1923 1924 /* ignore any backend state changes if we're suspending/suspended */ 1925 if (vdp->xdf_suspending || (vdp->xdf_state == XD_SUSPEND)) { 1926 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1927 mutex_exit(&vdp->xdf_cb_lk); 1928 return; 1929 } 1930 1931 switch (new_state) { 1932 case XenbusStateUnknown: 1933 case XenbusStateInitialising: 1934 case XenbusStateInitWait: 1935 case XenbusStateInitialised: 1936 if (vdp->xdf_state == XD_INIT) 1937 break; 1938 1939 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1940 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1941 break; 1942 ASSERT(vdp->xdf_state == XD_INIT); 1943 break; 1944 1945 case XenbusStateConnected: 1946 if ((vdp->xdf_state == XD_CONNECTED) || 1947 (vdp->xdf_state == XD_READY)) 1948 break; 1949 1950 if (vdp->xdf_state != XD_INIT) { 1951 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1952 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1953 break; 1954 ASSERT(vdp->xdf_state == XD_INIT); 1955 } 1956 1957 if (xdf_setstate_connected(vdp) != DDI_SUCCESS) { 1958 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1959 break; 1960 } 1961 ASSERT(vdp->xdf_state == XD_CONNECTED); 1962 break; 1963 1964 case XenbusStateClosing: 1965 if (xdf_isopen(vdp, -1)) { 1966 cmn_err(CE_NOTE, 1967 "xdf@%s: hot-unplug failed, still in use", 1968 vdp->xdf_addr); 1969 break; 1970 } 1971 /*FALLTHROUGH*/ 1972 case XenbusStateClosed: 1973 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 1974 break; 1975 } 1976 1977 /* notify anybody waiting for oe state change */ 1978 cv_broadcast(&vdp->xdf_dev_cv); 1979 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1980 mutex_exit(&vdp->xdf_cb_lk); 1981 } 1982 1983 static int 1984 xdf_connect_locked(xdf_t *vdp, boolean_t wait) 1985 { 1986 int rv, timeouts = 0, reset = 20; 1987 1988 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1989 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1990 1991 /* we can't connect once we're in the closed state */ 1992 if (vdp->xdf_state == XD_CLOSED) 1993 return (XD_CLOSED); 1994 1995 vdp->xdf_connect_req++; 1996 while (vdp->xdf_state != XD_READY) { 1997 mutex_exit(&vdp->xdf_dev_lk); 1998 1999 /* only one thread at a time can be the connection thread */ 2000 if (vdp->xdf_connect_thread == NULL) 2001 vdp->xdf_connect_thread = curthread; 2002 2003 if (vdp->xdf_connect_thread == curthread) { 2004 if ((timeouts > 0) && ((timeouts % reset) == 0)) { 2005 /* 2006 * If we haven't establised a connection 2007 * within the reset time, then disconnect 2008 * so we can try again, and double the reset 2009 * time. The reset time starts at 2 sec. 2010 */ 2011 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 2012 reset *= 2; 2013 } 2014 if (vdp->xdf_state == XD_UNKNOWN) 2015 (void) xdf_setstate_init(vdp); 2016 if (vdp->xdf_state == XD_INIT) 2017 (void) xdf_setstate_connected(vdp); 2018 } 2019 2020 mutex_enter(&vdp->xdf_dev_lk); 2021 if (!wait || (vdp->xdf_state == XD_READY)) 2022 goto out; 2023 2024 mutex_exit((&vdp->xdf_cb_lk)); 2025 if (vdp->xdf_connect_thread != curthread) { 2026 rv = cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk); 2027 } else { 2028 /* delay for 0.1 sec */ 2029 rv = cv_reltimedwait_sig(&vdp->xdf_dev_cv, 2030 &vdp->xdf_dev_lk, drv_usectohz(100*1000), 2031 TR_CLOCK_TICK); 2032 if (rv == -1) 2033 timeouts++; 2034 } 2035 mutex_exit((&vdp->xdf_dev_lk)); 2036 mutex_enter((&vdp->xdf_cb_lk)); 2037 mutex_enter((&vdp->xdf_dev_lk)); 2038 if (rv == 0) 2039 goto out; 2040 } 2041 2042 out: 2043 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2044 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 2045 2046 if (vdp->xdf_connect_thread == curthread) { 2047 /* 2048 * wake up someone else so they can become the connection 2049 * thread. 2050 */ 2051 cv_signal(&vdp->xdf_dev_cv); 2052 vdp->xdf_connect_thread = NULL; 2053 } 2054 2055 /* Try to lock the media */ 2056 mutex_exit((&vdp->xdf_dev_lk)); 2057 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2058 mutex_enter((&vdp->xdf_dev_lk)); 2059 2060 vdp->xdf_connect_req--; 2061 return (vdp->xdf_state); 2062 } 2063 2064 static uint_t 2065 xdf_iorestart(caddr_t arg) 2066 { 2067 xdf_t *vdp = (xdf_t *)arg; 2068 2069 ASSERT(vdp != NULL); 2070 2071 mutex_enter(&vdp->xdf_dev_lk); 2072 ASSERT(ISDMACBON(vdp)); 2073 SETDMACBOFF(vdp); 2074 mutex_exit(&vdp->xdf_dev_lk); 2075 2076 xdf_io_start(vdp); 2077 2078 return (DDI_INTR_CLAIMED); 2079 } 2080 2081 #ifdef XPV_HVM_DRIVER 2082 2083 typedef struct xdf_hvm_entry { 2084 list_node_t xdf_he_list; 2085 char *xdf_he_path; 2086 dev_info_t *xdf_he_dip; 2087 } xdf_hvm_entry_t; 2088 2089 static list_t xdf_hvm_list; 2090 static kmutex_t xdf_hvm_list_lock; 2091 2092 static xdf_hvm_entry_t * 2093 i_xdf_hvm_find(const char *path, dev_info_t *dip) 2094 { 2095 xdf_hvm_entry_t *i; 2096 2097 ASSERT((path != NULL) || (dip != NULL)); 2098 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); 2099 2100 i = list_head(&xdf_hvm_list); 2101 while (i != NULL) { 2102 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { 2103 i = list_next(&xdf_hvm_list, i); 2104 continue; 2105 } 2106 if ((dip != NULL) && (i->xdf_he_dip != dip)) { 2107 i = list_next(&xdf_hvm_list, i); 2108 continue; 2109 } 2110 break; 2111 } 2112 return (i); 2113 } 2114 2115 dev_info_t * 2116 xdf_hvm_hold(const char *path) 2117 { 2118 xdf_hvm_entry_t *i; 2119 dev_info_t *dip; 2120 2121 mutex_enter(&xdf_hvm_list_lock); 2122 i = i_xdf_hvm_find(path, NULL); 2123 if (i == NULL) { 2124 mutex_exit(&xdf_hvm_list_lock); 2125 return (B_FALSE); 2126 } 2127 ndi_hold_devi(dip = i->xdf_he_dip); 2128 mutex_exit(&xdf_hvm_list_lock); 2129 return (dip); 2130 } 2131 2132 static void 2133 xdf_hvm_add(dev_info_t *dip) 2134 { 2135 xdf_hvm_entry_t *i; 2136 char *path; 2137 2138 /* figure out the path for the dip */ 2139 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 2140 (void) ddi_pathname(dip, path); 2141 2142 i = kmem_alloc(sizeof (*i), KM_SLEEP); 2143 i->xdf_he_dip = dip; 2144 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); 2145 2146 mutex_enter(&xdf_hvm_list_lock); 2147 ASSERT(i_xdf_hvm_find(path, NULL) == NULL); 2148 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); 2149 list_insert_head(&xdf_hvm_list, i); 2150 mutex_exit(&xdf_hvm_list_lock); 2151 2152 kmem_free(path, MAXPATHLEN); 2153 } 2154 2155 static void 2156 xdf_hvm_rm(dev_info_t *dip) 2157 { 2158 xdf_hvm_entry_t *i; 2159 2160 mutex_enter(&xdf_hvm_list_lock); 2161 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); 2162 list_remove(&xdf_hvm_list, i); 2163 mutex_exit(&xdf_hvm_list_lock); 2164 2165 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); 2166 kmem_free(i, sizeof (*i)); 2167 } 2168 2169 static void 2170 xdf_hvm_init(void) 2171 { 2172 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), 2173 offsetof(xdf_hvm_entry_t, xdf_he_list)); 2174 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); 2175 } 2176 2177 static void 2178 xdf_hvm_fini(void) 2179 { 2180 ASSERT(list_head(&xdf_hvm_list) == NULL); 2181 list_destroy(&xdf_hvm_list); 2182 mutex_destroy(&xdf_hvm_list_lock); 2183 } 2184 2185 boolean_t 2186 xdf_hvm_connect(dev_info_t *dip) 2187 { 2188 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2189 char *oename, *str; 2190 int rv; 2191 2192 mutex_enter(&vdp->xdf_cb_lk); 2193 2194 /* 2195 * Before try to establish a connection we need to wait for the 2196 * backend hotplug scripts to have run. Once they are run the 2197 * "<oename>/hotplug-status" property will be set to "connected". 2198 */ 2199 for (;;) { 2200 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2201 2202 /* 2203 * Get the xenbus path to the backend device. Note that 2204 * we can't cache this path (and we look it up on each pass 2205 * through this loop) because it could change during 2206 * suspend, resume, and migration operations. 2207 */ 2208 if ((oename = xvdi_get_oename(dip)) == NULL) { 2209 mutex_exit(&vdp->xdf_cb_lk); 2210 return (B_FALSE); 2211 } 2212 2213 str = NULL; 2214 if ((xenbus_read_str(oename, XBP_HP_STATUS, &str) == 0) && 2215 (strcmp(str, XBV_HP_STATUS_CONN) == 0)) 2216 break; 2217 2218 if (str != NULL) 2219 strfree(str); 2220 2221 /* wait for an update to "<oename>/hotplug-status" */ 2222 if (cv_wait_sig(&vdp->xdf_hp_status_cv, &vdp->xdf_cb_lk) == 0) { 2223 /* we got interrupted by a signal */ 2224 mutex_exit(&vdp->xdf_cb_lk); 2225 return (B_FALSE); 2226 } 2227 } 2228 2229 /* Good news. The backend hotplug scripts have been run. */ 2230 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2231 ASSERT(strcmp(str, XBV_HP_STATUS_CONN) == 0); 2232 strfree(str); 2233 2234 /* 2235 * If we're emulating a cd device and if the backend doesn't support 2236 * media request opreations, then we're not going to bother trying 2237 * to establish a connection for a couple reasons. First off, media 2238 * requests support is required to support operations like eject and 2239 * media locking. Second, other backend platforms like Linux don't 2240 * support hvm pv cdrom access. They don't even have a backend pv 2241 * driver for cdrom device nodes, so we don't want to block forever 2242 * waiting for a connection to a backend driver that doesn't exist. 2243 */ 2244 if (XD_IS_CD(vdp) && !xenbus_exists(oename, XBP_MEDIA_REQ_SUP)) { 2245 mutex_exit(&vdp->xdf_cb_lk); 2246 return (B_FALSE); 2247 } 2248 2249 mutex_enter(&vdp->xdf_dev_lk); 2250 rv = xdf_connect_locked(vdp, B_TRUE); 2251 mutex_exit(&vdp->xdf_dev_lk); 2252 mutex_exit(&vdp->xdf_cb_lk); 2253 2254 return ((rv == XD_READY) ? B_TRUE : B_FALSE); 2255 } 2256 2257 int 2258 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2259 { 2260 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2261 2262 /* sanity check the requested physical geometry */ 2263 mutex_enter(&vdp->xdf_dev_lk); 2264 if ((geomp->g_secsize != XB_BSIZE) || 2265 (geomp->g_capacity == 0)) { 2266 mutex_exit(&vdp->xdf_dev_lk); 2267 return (EINVAL); 2268 } 2269 2270 /* 2271 * If we've already connected to the backend device then make sure 2272 * we're not defining a physical geometry larger than our backend 2273 * device. 2274 */ 2275 if ((vdp->xdf_xdev_nblocks != 0) && 2276 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { 2277 mutex_exit(&vdp->xdf_dev_lk); 2278 return (EINVAL); 2279 } 2280 2281 bzero(&vdp->xdf_pgeom, sizeof (vdp->xdf_pgeom)); 2282 vdp->xdf_pgeom.g_ncyl = geomp->g_ncyl; 2283 vdp->xdf_pgeom.g_acyl = geomp->g_acyl; 2284 vdp->xdf_pgeom.g_nhead = geomp->g_nhead; 2285 vdp->xdf_pgeom.g_nsect = geomp->g_nsect; 2286 vdp->xdf_pgeom.g_secsize = geomp->g_secsize; 2287 vdp->xdf_pgeom.g_capacity = geomp->g_capacity; 2288 vdp->xdf_pgeom.g_intrlv = geomp->g_intrlv; 2289 vdp->xdf_pgeom.g_rpm = geomp->g_rpm; 2290 2291 vdp->xdf_pgeom_fixed = B_TRUE; 2292 mutex_exit(&vdp->xdf_dev_lk); 2293 2294 /* force a re-validation */ 2295 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 2296 2297 return (0); 2298 } 2299 2300 boolean_t 2301 xdf_is_cd(dev_info_t *dip) 2302 { 2303 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2304 boolean_t rv; 2305 2306 mutex_enter(&vdp->xdf_cb_lk); 2307 rv = XD_IS_CD(vdp); 2308 mutex_exit(&vdp->xdf_cb_lk); 2309 return (rv); 2310 } 2311 2312 boolean_t 2313 xdf_is_rm(dev_info_t *dip) 2314 { 2315 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2316 boolean_t rv; 2317 2318 mutex_enter(&vdp->xdf_cb_lk); 2319 rv = XD_IS_RM(vdp); 2320 mutex_exit(&vdp->xdf_cb_lk); 2321 return (rv); 2322 } 2323 2324 boolean_t 2325 xdf_media_req_supported(dev_info_t *dip) 2326 { 2327 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2328 boolean_t rv; 2329 2330 mutex_enter(&vdp->xdf_cb_lk); 2331 rv = vdp->xdf_media_req_supported; 2332 mutex_exit(&vdp->xdf_cb_lk); 2333 return (rv); 2334 } 2335 2336 #endif /* XPV_HVM_DRIVER */ 2337 2338 static int 2339 xdf_lb_getcap(dev_info_t *dip, diskaddr_t *capp) 2340 { 2341 xdf_t *vdp; 2342 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2343 2344 if (vdp == NULL) 2345 return (ENXIO); 2346 2347 mutex_enter(&vdp->xdf_dev_lk); 2348 *capp = vdp->xdf_pgeom.g_capacity; 2349 DPRINTF(LBL_DBG, ("xdf@%s:capacity %llu\n", vdp->xdf_addr, *capp)); 2350 mutex_exit(&vdp->xdf_dev_lk); 2351 return (0); 2352 } 2353 2354 static int 2355 xdf_lb_getpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2356 { 2357 xdf_t *vdp; 2358 2359 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 2360 return (ENXIO); 2361 *geomp = vdp->xdf_pgeom; 2362 return (0); 2363 } 2364 2365 /* 2366 * No real HBA, no geometry available from it 2367 */ 2368 /*ARGSUSED*/ 2369 static int 2370 xdf_lb_getvgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2371 { 2372 return (EINVAL); 2373 } 2374 2375 static int 2376 xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep) 2377 { 2378 xdf_t *vdp; 2379 2380 if (!(vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)))) 2381 return (ENXIO); 2382 2383 if (XD_IS_RO(vdp)) 2384 tgattributep->media_is_writable = 0; 2385 else 2386 tgattributep->media_is_writable = 1; 2387 tgattributep->media_is_rotational = 0; 2388 return (0); 2389 } 2390 2391 /* ARGSUSED3 */ 2392 int 2393 xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 2394 { 2395 int instance; 2396 xdf_t *vdp; 2397 2398 instance = ddi_get_instance(dip); 2399 2400 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) 2401 return (ENXIO); 2402 2403 switch (cmd) { 2404 case TG_GETPHYGEOM: 2405 return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg)); 2406 case TG_GETVIRTGEOM: 2407 return (xdf_lb_getvgeom(dip, (cmlb_geom_t *)arg)); 2408 case TG_GETCAPACITY: 2409 return (xdf_lb_getcap(dip, (diskaddr_t *)arg)); 2410 case TG_GETBLOCKSIZE: 2411 mutex_enter(&vdp->xdf_cb_lk); 2412 *(uint32_t *)arg = vdp->xdf_xdev_secsize; 2413 mutex_exit(&vdp->xdf_cb_lk); 2414 return (0); 2415 case TG_GETATTR: 2416 return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg)); 2417 default: 2418 return (ENOTTY); 2419 } 2420 } 2421 2422 /* ARGSUSED5 */ 2423 int 2424 xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp, 2425 diskaddr_t start, size_t reqlen, void *tg_cookie) 2426 { 2427 xdf_t *vdp; 2428 struct buf *bp; 2429 int err = 0; 2430 2431 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2432 2433 /* We don't allow IO from the oe_change callback thread */ 2434 ASSERT(curthread != vdp->xdf_oe_change_thread); 2435 2436 /* 2437 * Having secsize of 0 means that device isn't connected yet. 2438 * FIXME This happens for CD devices, and there's nothing we 2439 * can do about it at the moment. 2440 */ 2441 if (vdp->xdf_xdev_secsize == 0) 2442 return (EIO); 2443 2444 if ((start + ((reqlen / (vdp->xdf_xdev_secsize / DEV_BSIZE)) 2445 >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) 2446 return (EINVAL); 2447 2448 bp = getrbuf(KM_SLEEP); 2449 if (cmd == TG_READ) 2450 bp->b_flags = B_BUSY | B_READ; 2451 else 2452 bp->b_flags = B_BUSY | B_WRITE; 2453 2454 bp->b_un.b_addr = bufp; 2455 bp->b_bcount = reqlen; 2456 bp->b_blkno = start * (vdp->xdf_xdev_secsize / DEV_BSIZE); 2457 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 2458 2459 mutex_enter(&vdp->xdf_dev_lk); 2460 xdf_bp_push(vdp, bp); 2461 mutex_exit(&vdp->xdf_dev_lk); 2462 xdf_io_start(vdp); 2463 if (curthread == vdp->xdf_ready_tq_thread) 2464 (void) xdf_ring_drain(vdp); 2465 err = biowait(bp); 2466 ASSERT(bp->b_flags & B_DONE); 2467 freerbuf(bp); 2468 return (err); 2469 } 2470 2471 /* 2472 * Lock the current media. Set the media state to "lock". 2473 * (Media locks are only respected by the backend driver.) 2474 */ 2475 static int 2476 xdf_ioctl_mlock(xdf_t *vdp) 2477 { 2478 int rv; 2479 mutex_enter(&vdp->xdf_cb_lk); 2480 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2481 mutex_exit(&vdp->xdf_cb_lk); 2482 return (rv); 2483 } 2484 2485 /* 2486 * Release a media lock. Set the media state to "none". 2487 */ 2488 static int 2489 xdf_ioctl_munlock(xdf_t *vdp) 2490 { 2491 int rv; 2492 mutex_enter(&vdp->xdf_cb_lk); 2493 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_NONE, B_TRUE); 2494 mutex_exit(&vdp->xdf_cb_lk); 2495 return (rv); 2496 } 2497 2498 /* 2499 * Eject the current media. Ignores any media locks. (Media locks 2500 * are only for benifit of the the backend.) 2501 */ 2502 static int 2503 xdf_ioctl_eject(xdf_t *vdp) 2504 { 2505 int rv; 2506 2507 mutex_enter(&vdp->xdf_cb_lk); 2508 if ((rv = xdf_media_req(vdp, XBV_MEDIA_REQ_EJECT, B_FALSE)) != 0) { 2509 mutex_exit(&vdp->xdf_cb_lk); 2510 return (rv); 2511 } 2512 2513 /* 2514 * We've set the media requests xenbus parameter to eject, so now 2515 * disconnect from the backend, wait for the backend to clear 2516 * the media requets xenbus paramter, and then we can reconnect 2517 * to the backend. 2518 */ 2519 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 2520 mutex_enter(&vdp->xdf_dev_lk); 2521 if (xdf_connect_locked(vdp, B_TRUE) != XD_READY) { 2522 mutex_exit(&vdp->xdf_dev_lk); 2523 mutex_exit(&vdp->xdf_cb_lk); 2524 return (EIO); 2525 } 2526 mutex_exit(&vdp->xdf_dev_lk); 2527 mutex_exit(&vdp->xdf_cb_lk); 2528 return (0); 2529 } 2530 2531 /* 2532 * Watch for media state changes. This can be an insertion of a device 2533 * (triggered by a 'xm block-configure' request in another domain) or 2534 * the ejection of a device (triggered by a local "eject" operation). 2535 * For a full description of the DKIOCSTATE ioctl behavior see dkio(4I). 2536 */ 2537 static int 2538 xdf_dkstate(xdf_t *vdp, enum dkio_state mstate) 2539 { 2540 enum dkio_state prev_state; 2541 2542 mutex_enter(&vdp->xdf_cb_lk); 2543 prev_state = vdp->xdf_mstate; 2544 2545 if (vdp->xdf_mstate == mstate) { 2546 while (vdp->xdf_mstate == prev_state) { 2547 if (cv_wait_sig(&vdp->xdf_mstate_cv, 2548 &vdp->xdf_cb_lk) == 0) { 2549 mutex_exit(&vdp->xdf_cb_lk); 2550 return (EINTR); 2551 } 2552 } 2553 } 2554 2555 if ((prev_state != DKIO_INSERTED) && 2556 (vdp->xdf_mstate == DKIO_INSERTED)) { 2557 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2558 mutex_exit(&vdp->xdf_cb_lk); 2559 return (0); 2560 } 2561 2562 mutex_exit(&vdp->xdf_cb_lk); 2563 return (0); 2564 } 2565 2566 /*ARGSUSED*/ 2567 static int 2568 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 2569 int *rvalp) 2570 { 2571 minor_t minor = getminor(dev); 2572 int part = XDF_PART(minor); 2573 xdf_t *vdp; 2574 int rv; 2575 2576 if (((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) || 2577 (!xdf_isopen(vdp, part))) 2578 return (ENXIO); 2579 2580 DPRINTF(IOCTL_DBG, ("xdf@%s:ioctl: cmd %d (0x%x)\n", 2581 vdp->xdf_addr, cmd, cmd)); 2582 2583 switch (cmd) { 2584 default: 2585 return (ENOTTY); 2586 case DKIOCG_PHYGEOM: 2587 case DKIOCG_VIRTGEOM: 2588 case DKIOCGGEOM: 2589 case DKIOCSGEOM: 2590 case DKIOCGAPART: 2591 case DKIOCSAPART: 2592 case DKIOCGVTOC: 2593 case DKIOCSVTOC: 2594 case DKIOCPARTINFO: 2595 case DKIOCGEXTVTOC: 2596 case DKIOCSEXTVTOC: 2597 case DKIOCEXTPARTINFO: 2598 case DKIOCGMBOOT: 2599 case DKIOCSMBOOT: 2600 case DKIOCGETEFI: 2601 case DKIOCSETEFI: 2602 case DKIOCSETEXTPART: 2603 case DKIOCPARTITION: 2604 rv = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 2605 rvalp, NULL); 2606 if (rv != 0) 2607 return (rv); 2608 /* 2609 * If we're labelling the disk, we have to update the geometry 2610 * in the cmlb data structures, and we also have to write a new 2611 * devid to the disk. Note that writing an EFI label currently 2612 * requires 4 ioctls, and devid setup will fail on all but the 2613 * last. 2614 */ 2615 if (cmd == DKIOCSEXTVTOC || cmd == DKIOCSVTOC || 2616 cmd == DKIOCSETEFI) { 2617 rv = cmlb_validate(vdp->xdf_vd_lbl, 0, 0); 2618 if (rv == 0) { 2619 xdf_devid_setup(vdp); 2620 } else { 2621 cmn_err(CE_WARN, 2622 "xdf@%s, labeling failed on validate", 2623 vdp->xdf_addr); 2624 } 2625 } 2626 return (rv); 2627 case FDEJECT: 2628 case DKIOCEJECT: 2629 case CDROMEJECT: 2630 return (xdf_ioctl_eject(vdp)); 2631 case DKIOCLOCK: 2632 return (xdf_ioctl_mlock(vdp)); 2633 case DKIOCUNLOCK: 2634 return (xdf_ioctl_munlock(vdp)); 2635 case CDROMREADOFFSET: { 2636 int offset = 0; 2637 if (!XD_IS_CD(vdp)) 2638 return (ENOTTY); 2639 if (ddi_copyout(&offset, (void *)arg, sizeof (int), mode)) 2640 return (EFAULT); 2641 return (0); 2642 } 2643 case DKIOCGMEDIAINFO: { 2644 struct dk_minfo media_info; 2645 2646 media_info.dki_lbsize = vdp->xdf_xdev_secsize; 2647 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; 2648 if (XD_IS_CD(vdp)) 2649 media_info.dki_media_type = DK_CDROM; 2650 else 2651 media_info.dki_media_type = DK_FIXED_DISK; 2652 2653 if (ddi_copyout(&media_info, (void *)arg, 2654 sizeof (struct dk_minfo), mode)) 2655 return (EFAULT); 2656 return (0); 2657 } 2658 case DKIOCINFO: { 2659 struct dk_cinfo info; 2660 2661 /* controller information */ 2662 if (XD_IS_CD(vdp)) 2663 info.dki_ctype = DKC_CDROM; 2664 else 2665 info.dki_ctype = DKC_VBD; 2666 2667 info.dki_cnum = 0; 2668 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 2669 2670 /* unit information */ 2671 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 2672 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 2673 info.dki_flags = DKI_FMTVOL; 2674 info.dki_partition = part; 2675 info.dki_maxtransfer = maxphys / DEV_BSIZE; 2676 info.dki_addr = 0; 2677 info.dki_space = 0; 2678 info.dki_prio = 0; 2679 info.dki_vec = 0; 2680 2681 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 2682 return (EFAULT); 2683 return (0); 2684 } 2685 case DKIOCSTATE: { 2686 enum dkio_state mstate; 2687 2688 if (ddi_copyin((void *)arg, &mstate, 2689 sizeof (mstate), mode) != 0) 2690 return (EFAULT); 2691 if ((rv = xdf_dkstate(vdp, mstate)) != 0) 2692 return (rv); 2693 mstate = vdp->xdf_mstate; 2694 if (ddi_copyout(&mstate, (void *)arg, 2695 sizeof (mstate), mode) != 0) 2696 return (EFAULT); 2697 return (0); 2698 } 2699 case DKIOCREMOVABLE: { 2700 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2701 if (ddi_copyout(&i, (caddr_t)arg, sizeof (i), mode)) 2702 return (EFAULT); 2703 return (0); 2704 } 2705 case DKIOCGETWCE: { 2706 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2707 if (ddi_copyout(&i, (void *)arg, sizeof (i), mode)) 2708 return (EFAULT); 2709 return (0); 2710 } 2711 case DKIOCSETWCE: { 2712 int i; 2713 if (ddi_copyin((void *)arg, &i, sizeof (i), mode)) 2714 return (EFAULT); 2715 vdp->xdf_wce = VOID2BOOLEAN(i); 2716 return (0); 2717 } 2718 case DKIOCFLUSHWRITECACHE: { 2719 struct dk_callback *dkc = (struct dk_callback *)arg; 2720 2721 if (vdp->xdf_flush_supported) { 2722 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2723 NULL, 0, 0, (void *)dev); 2724 } else if (vdp->xdf_feature_barrier && 2725 !xdf_barrier_flush_disable) { 2726 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2727 vdp->xdf_cache_flush_block, xdf_flush_block, 2728 vdp->xdf_xdev_secsize, (void *)dev); 2729 } else { 2730 return (ENOTTY); 2731 } 2732 if ((mode & FKIOCTL) && (dkc != NULL) && 2733 (dkc->dkc_callback != NULL)) { 2734 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 2735 /* need to return 0 after calling callback */ 2736 rv = 0; 2737 } 2738 return (rv); 2739 } 2740 } 2741 /*NOTREACHED*/ 2742 } 2743 2744 static int 2745 xdf_strategy(struct buf *bp) 2746 { 2747 xdf_t *vdp; 2748 minor_t minor; 2749 diskaddr_t p_blkct, p_blkst; 2750 daddr_t blkno; 2751 ulong_t nblks; 2752 int part; 2753 2754 minor = getminor(bp->b_edev); 2755 part = XDF_PART(minor); 2756 vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor)); 2757 2758 mutex_enter(&vdp->xdf_dev_lk); 2759 if (!xdf_isopen(vdp, part)) { 2760 mutex_exit(&vdp->xdf_dev_lk); 2761 xdf_io_err(bp, ENXIO, 0); 2762 return (0); 2763 } 2764 2765 /* We don't allow IO from the oe_change callback thread */ 2766 ASSERT(curthread != vdp->xdf_oe_change_thread); 2767 2768 /* Check for writes to a read only device */ 2769 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 2770 mutex_exit(&vdp->xdf_dev_lk); 2771 xdf_io_err(bp, EROFS, 0); 2772 return (0); 2773 } 2774 2775 /* Check if this I/O is accessing a partition or the entire disk */ 2776 if ((long)bp->b_private == XB_SLICE_NONE) { 2777 /* This I/O is using an absolute offset */ 2778 p_blkct = vdp->xdf_xdev_nblocks; 2779 p_blkst = 0; 2780 } else { 2781 /* This I/O is using a partition relative offset */ 2782 mutex_exit(&vdp->xdf_dev_lk); 2783 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 2784 &p_blkst, NULL, NULL, NULL)) { 2785 xdf_io_err(bp, ENXIO, 0); 2786 return (0); 2787 } 2788 mutex_enter(&vdp->xdf_dev_lk); 2789 } 2790 2791 /* 2792 * Adjust the real blkno and bcount according to the underline 2793 * physical sector size. 2794 */ 2795 blkno = bp->b_blkno / (vdp->xdf_xdev_secsize / XB_BSIZE); 2796 2797 /* check for a starting block beyond the disk or partition limit */ 2798 if (blkno > p_blkct) { 2799 DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64, 2800 vdp->xdf_addr, (longlong_t)blkno, (uint64_t)p_blkct)); 2801 mutex_exit(&vdp->xdf_dev_lk); 2802 xdf_io_err(bp, EINVAL, 0); 2803 return (0); 2804 } 2805 2806 /* Legacy: don't set error flag at this case */ 2807 if (blkno == p_blkct) { 2808 mutex_exit(&vdp->xdf_dev_lk); 2809 bp->b_resid = bp->b_bcount; 2810 biodone(bp); 2811 return (0); 2812 } 2813 2814 /* sanitize the input buf */ 2815 bioerror(bp, 0); 2816 bp->b_resid = 0; 2817 bp->av_back = bp->av_forw = NULL; 2818 2819 /* Adjust for partial transfer, this will result in an error later */ 2820 if (vdp->xdf_xdev_secsize != 0 && 2821 vdp->xdf_xdev_secsize != XB_BSIZE) { 2822 nblks = bp->b_bcount / vdp->xdf_xdev_secsize; 2823 } else { 2824 nblks = bp->b_bcount >> XB_BSHIFT; 2825 } 2826 2827 if ((blkno + nblks) > p_blkct) { 2828 if (vdp->xdf_xdev_secsize != 0 && 2829 vdp->xdf_xdev_secsize != XB_BSIZE) { 2830 bp->b_resid = 2831 ((blkno + nblks) - p_blkct) * 2832 vdp->xdf_xdev_secsize; 2833 } else { 2834 bp->b_resid = 2835 ((blkno + nblks) - p_blkct) << 2836 XB_BSHIFT; 2837 } 2838 bp->b_bcount -= bp->b_resid; 2839 } 2840 2841 DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n", 2842 vdp->xdf_addr, (longlong_t)blkno, (ulong_t)bp->b_bcount)); 2843 2844 /* Fix up the buf struct */ 2845 bp->b_flags |= B_BUSY; 2846 bp->b_private = (void *)(uintptr_t)p_blkst; 2847 2848 xdf_bp_push(vdp, bp); 2849 mutex_exit(&vdp->xdf_dev_lk); 2850 xdf_io_start(vdp); 2851 if (do_polled_io) 2852 (void) xdf_ring_drain(vdp); 2853 return (0); 2854 } 2855 2856 /*ARGSUSED*/ 2857 static int 2858 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 2859 { 2860 xdf_t *vdp; 2861 minor_t minor; 2862 diskaddr_t p_blkcnt; 2863 int part; 2864 2865 minor = getminor(dev); 2866 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2867 return (ENXIO); 2868 2869 DPRINTF(IO_DBG, ("xdf@%s: read offset 0x%"PRIx64"\n", 2870 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2871 2872 part = XDF_PART(minor); 2873 if (!xdf_isopen(vdp, part)) 2874 return (ENXIO); 2875 2876 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2877 NULL, NULL, NULL, NULL)) 2878 return (ENXIO); 2879 2880 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2881 return (ENOSPC); 2882 2883 if (U_INVAL(uiop)) 2884 return (EINVAL); 2885 2886 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 2887 } 2888 2889 /*ARGSUSED*/ 2890 static int 2891 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 2892 { 2893 xdf_t *vdp; 2894 minor_t minor; 2895 diskaddr_t p_blkcnt; 2896 int part; 2897 2898 minor = getminor(dev); 2899 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2900 return (ENXIO); 2901 2902 DPRINTF(IO_DBG, ("xdf@%s: write offset 0x%"PRIx64"\n", 2903 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2904 2905 part = XDF_PART(minor); 2906 if (!xdf_isopen(vdp, part)) 2907 return (ENXIO); 2908 2909 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2910 NULL, NULL, NULL, NULL)) 2911 return (ENXIO); 2912 2913 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2914 return (ENOSPC); 2915 2916 if (U_INVAL(uiop)) 2917 return (EINVAL); 2918 2919 return (physio(xdf_strategy, NULL, dev, B_WRITE, xdfmin, uiop)); 2920 } 2921 2922 /*ARGSUSED*/ 2923 static int 2924 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 2925 { 2926 xdf_t *vdp; 2927 minor_t minor; 2928 struct uio *uiop = aiop->aio_uio; 2929 diskaddr_t p_blkcnt; 2930 int part; 2931 2932 minor = getminor(dev); 2933 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2934 return (ENXIO); 2935 2936 part = XDF_PART(minor); 2937 if (!xdf_isopen(vdp, part)) 2938 return (ENXIO); 2939 2940 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2941 NULL, NULL, NULL, NULL)) 2942 return (ENXIO); 2943 2944 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2945 return (ENOSPC); 2946 2947 if (U_INVAL(uiop)) 2948 return (EINVAL); 2949 2950 return (aphysio(xdf_strategy, anocancel, dev, B_READ, xdfmin, aiop)); 2951 } 2952 2953 /*ARGSUSED*/ 2954 static int 2955 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 2956 { 2957 xdf_t *vdp; 2958 minor_t minor; 2959 struct uio *uiop = aiop->aio_uio; 2960 diskaddr_t p_blkcnt; 2961 int part; 2962 2963 minor = getminor(dev); 2964 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2965 return (ENXIO); 2966 2967 part = XDF_PART(minor); 2968 if (!xdf_isopen(vdp, part)) 2969 return (ENXIO); 2970 2971 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2972 NULL, NULL, NULL, NULL)) 2973 return (ENXIO); 2974 2975 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2976 return (ENOSPC); 2977 2978 if (U_INVAL(uiop)) 2979 return (EINVAL); 2980 2981 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, xdfmin, aiop)); 2982 } 2983 2984 static int 2985 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 2986 { 2987 struct buf dumpbuf, *dbp = &dumpbuf; 2988 xdf_t *vdp; 2989 minor_t minor; 2990 int err = 0; 2991 int part; 2992 diskaddr_t p_blkcnt, p_blkst; 2993 2994 minor = getminor(dev); 2995 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2996 return (ENXIO); 2997 2998 DPRINTF(IO_DBG, ("xdf@%s: dump addr (0x%p) blk (%ld) nblks (%d)\n", 2999 vdp->xdf_addr, (void *)addr, blkno, nblk)); 3000 3001 /* We don't allow IO from the oe_change callback thread */ 3002 ASSERT(curthread != vdp->xdf_oe_change_thread); 3003 3004 part = XDF_PART(minor); 3005 if (!xdf_isopen(vdp, part)) 3006 return (ENXIO); 3007 3008 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 3009 NULL, NULL, NULL)) 3010 return (ENXIO); 3011 3012 if ((blkno + nblk) > 3013 (p_blkcnt * (vdp->xdf_xdev_secsize / XB_BSIZE))) { 3014 cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64, 3015 vdp->xdf_addr, (daddr_t)((blkno + nblk) / 3016 (vdp->xdf_xdev_secsize / XB_BSIZE)), (uint64_t)p_blkcnt); 3017 return (EINVAL); 3018 } 3019 3020 bioinit(dbp); 3021 dbp->b_flags = B_BUSY; 3022 dbp->b_un.b_addr = addr; 3023 dbp->b_bcount = nblk << DEV_BSHIFT; 3024 dbp->b_blkno = blkno; 3025 dbp->b_edev = dev; 3026 dbp->b_private = (void *)(uintptr_t)p_blkst; 3027 3028 mutex_enter(&vdp->xdf_dev_lk); 3029 xdf_bp_push(vdp, dbp); 3030 mutex_exit(&vdp->xdf_dev_lk); 3031 xdf_io_start(vdp); 3032 err = xdf_ring_drain(vdp); 3033 biofini(dbp); 3034 return (err); 3035 } 3036 3037 /*ARGSUSED*/ 3038 static int 3039 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 3040 { 3041 minor_t minor; 3042 xdf_t *vdp; 3043 int part; 3044 ulong_t parbit; 3045 3046 minor = getminor(dev); 3047 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 3048 return (ENXIO); 3049 3050 mutex_enter(&vdp->xdf_dev_lk); 3051 part = XDF_PART(minor); 3052 if (!xdf_isopen(vdp, part)) { 3053 mutex_exit(&vdp->xdf_dev_lk); 3054 return (ENXIO); 3055 } 3056 parbit = 1 << part; 3057 3058 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); 3059 if (otyp == OTYP_LYR) { 3060 ASSERT(vdp->xdf_vd_lyropen[part] > 0); 3061 if (--vdp->xdf_vd_lyropen[part] == 0) 3062 vdp->xdf_vd_open[otyp] &= ~parbit; 3063 } else { 3064 vdp->xdf_vd_open[otyp] &= ~parbit; 3065 } 3066 vdp->xdf_vd_exclopen &= ~parbit; 3067 3068 mutex_exit(&vdp->xdf_dev_lk); 3069 return (0); 3070 } 3071 3072 static int 3073 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 3074 { 3075 minor_t minor; 3076 xdf_t *vdp; 3077 int part; 3078 ulong_t parbit; 3079 diskaddr_t p_blkct = 0; 3080 boolean_t firstopen; 3081 boolean_t nodelay; 3082 3083 minor = getminor(*devp); 3084 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 3085 return (ENXIO); 3086 3087 nodelay = (flag & (FNDELAY | FNONBLOCK)); 3088 3089 DPRINTF(DDI_DBG, ("xdf@%s: opening\n", vdp->xdf_addr)); 3090 3091 /* do cv_wait until connected or failed */ 3092 mutex_enter(&vdp->xdf_cb_lk); 3093 mutex_enter(&vdp->xdf_dev_lk); 3094 if (!nodelay && (xdf_connect_locked(vdp, B_TRUE) != XD_READY)) { 3095 mutex_exit(&vdp->xdf_dev_lk); 3096 mutex_exit(&vdp->xdf_cb_lk); 3097 return (ENXIO); 3098 } 3099 mutex_exit(&vdp->xdf_cb_lk); 3100 3101 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 3102 mutex_exit(&vdp->xdf_dev_lk); 3103 return (EROFS); 3104 } 3105 3106 part = XDF_PART(minor); 3107 parbit = 1 << part; 3108 if ((vdp->xdf_vd_exclopen & parbit) || 3109 ((flag & FEXCL) && xdf_isopen(vdp, part))) { 3110 mutex_exit(&vdp->xdf_dev_lk); 3111 return (EBUSY); 3112 } 3113 3114 /* are we the first one to open this node? */ 3115 firstopen = !xdf_isopen(vdp, -1); 3116 3117 if (otyp == OTYP_LYR) 3118 vdp->xdf_vd_lyropen[part]++; 3119 3120 vdp->xdf_vd_open[otyp] |= parbit; 3121 3122 if (flag & FEXCL) 3123 vdp->xdf_vd_exclopen |= parbit; 3124 3125 mutex_exit(&vdp->xdf_dev_lk); 3126 3127 /* force a re-validation */ 3128 if (firstopen) 3129 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 3130 3131 /* If this is a non-blocking open then we're done */ 3132 if (nodelay) 3133 return (0); 3134 3135 /* 3136 * This is a blocking open, so we require: 3137 * - that the disk have a valid label on it 3138 * - that the size of the partition that we're opening is non-zero 3139 */ 3140 if ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 3141 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0)) { 3142 (void) xdf_close(*devp, flag, otyp, credp); 3143 return (ENXIO); 3144 } 3145 3146 return (0); 3147 } 3148 3149 /*ARGSUSED*/ 3150 static void 3151 xdf_watch_hp_status_cb(dev_info_t *dip, const char *path, void *arg) 3152 { 3153 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 3154 cv_broadcast(&vdp->xdf_hp_status_cv); 3155 } 3156 3157 static int 3158 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 3159 char *name, caddr_t valuep, int *lengthp) 3160 { 3161 xdf_t *vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 3162 3163 /* 3164 * Sanity check that if a dev_t or dip were specified that they 3165 * correspond to this device driver. On debug kernels we'll 3166 * panic and on non-debug kernels we'll return failure. 3167 */ 3168 ASSERT(ddi_driver_major(dip) == xdf_major); 3169 ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdf_major)); 3170 if ((ddi_driver_major(dip) != xdf_major) || 3171 ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdf_major))) 3172 return (DDI_PROP_NOT_FOUND); 3173 3174 if (vdp == NULL) 3175 return (ddi_prop_op(dev, dip, prop_op, flags, 3176 name, valuep, lengthp)); 3177 3178 return (cmlb_prop_op(vdp->xdf_vd_lbl, 3179 dev, dip, prop_op, flags, name, valuep, lengthp, 3180 XDF_PART(getminor(dev)), NULL)); 3181 } 3182 3183 /*ARGSUSED*/ 3184 static int 3185 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 3186 { 3187 int instance = XDF_INST(getminor((dev_t)arg)); 3188 xdf_t *vbdp; 3189 3190 switch (cmd) { 3191 case DDI_INFO_DEVT2DEVINFO: 3192 if ((vbdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) { 3193 *rp = NULL; 3194 return (DDI_FAILURE); 3195 } 3196 *rp = vbdp->xdf_dip; 3197 return (DDI_SUCCESS); 3198 3199 case DDI_INFO_DEVT2INSTANCE: 3200 *rp = (void *)(uintptr_t)instance; 3201 return (DDI_SUCCESS); 3202 3203 default: 3204 return (DDI_FAILURE); 3205 } 3206 } 3207 3208 /*ARGSUSED*/ 3209 static int 3210 xdf_resume(dev_info_t *dip) 3211 { 3212 xdf_t *vdp; 3213 char *oename; 3214 3215 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 3216 goto err; 3217 3218 if (xdf_debug & SUSRES_DBG) 3219 xen_printf("xdf@%s: xdf_resume\n", vdp->xdf_addr); 3220 3221 mutex_enter(&vdp->xdf_cb_lk); 3222 3223 if (xvdi_resume(dip) != DDI_SUCCESS) { 3224 mutex_exit(&vdp->xdf_cb_lk); 3225 goto err; 3226 } 3227 3228 if (((oename = xvdi_get_oename(dip)) == NULL) || 3229 (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3230 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)) { 3231 mutex_exit(&vdp->xdf_cb_lk); 3232 goto err; 3233 } 3234 3235 mutex_enter(&vdp->xdf_dev_lk); 3236 ASSERT(vdp->xdf_state != XD_READY); 3237 xdf_set_state(vdp, XD_UNKNOWN); 3238 mutex_exit(&vdp->xdf_dev_lk); 3239 3240 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3241 mutex_exit(&vdp->xdf_cb_lk); 3242 goto err; 3243 } 3244 3245 mutex_exit(&vdp->xdf_cb_lk); 3246 3247 if (xdf_debug & SUSRES_DBG) 3248 xen_printf("xdf@%s: xdf_resume: done\n", vdp->xdf_addr); 3249 return (DDI_SUCCESS); 3250 err: 3251 if (xdf_debug & SUSRES_DBG) 3252 xen_printf("xdf@%s: xdf_resume: fail\n", vdp->xdf_addr); 3253 return (DDI_FAILURE); 3254 } 3255 3256 /* 3257 * Uses the in-memory devid if one exists. 3258 * 3259 * Create a devid and write it on the first block of the last track of 3260 * the last cylinder. 3261 * Return DDI_SUCCESS or DDI_FAILURE. 3262 */ 3263 static int 3264 xdf_devid_fabricate(xdf_t *vdp) 3265 { 3266 ddi_devid_t devid = vdp->xdf_tgt_devid; /* null if no devid */ 3267 struct dk_devid *dkdevidp = NULL; /* devid struct stored on disk */ 3268 diskaddr_t blk; 3269 uint_t *ip, chksum; 3270 int i, devid_size; 3271 3272 if (cmlb_get_devid_block(vdp->xdf_vd_lbl, &blk, NULL) != 0) 3273 goto err; 3274 3275 if (devid == NULL && ddi_devid_init(vdp->xdf_dip, DEVID_FAB, 0, 3276 NULL, &devid) != DDI_SUCCESS) 3277 goto err; 3278 3279 /* allocate a buffer */ 3280 dkdevidp = (struct dk_devid *)kmem_zalloc(NBPSCTR, KM_SLEEP); 3281 3282 /* Fill in the revision */ 3283 dkdevidp->dkd_rev_hi = DK_DEVID_REV_MSB; 3284 dkdevidp->dkd_rev_lo = DK_DEVID_REV_LSB; 3285 3286 /* Copy in the device id */ 3287 devid_size = ddi_devid_sizeof(devid); 3288 if (devid_size > DK_DEVID_SIZE) 3289 goto err; 3290 bcopy(devid, dkdevidp->dkd_devid, devid_size); 3291 3292 /* Calculate the chksum */ 3293 chksum = 0; 3294 ip = (uint_t *)dkdevidp; 3295 for (i = 0; i < (NBPSCTR / sizeof (int)) - 1; i++) 3296 chksum ^= ip[i]; 3297 3298 /* Fill in the checksum */ 3299 DKD_FORMCHKSUM(chksum, dkdevidp); 3300 3301 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, dkdevidp, blk, 3302 NBPSCTR, NULL) != 0) 3303 goto err; 3304 3305 kmem_free(dkdevidp, NBPSCTR); 3306 3307 vdp->xdf_tgt_devid = devid; 3308 return (DDI_SUCCESS); 3309 3310 err: 3311 if (dkdevidp != NULL) 3312 kmem_free(dkdevidp, NBPSCTR); 3313 if (devid != NULL && vdp->xdf_tgt_devid == NULL) 3314 ddi_devid_free(devid); 3315 return (DDI_FAILURE); 3316 } 3317 3318 /* 3319 * xdf_devid_read() is a local copy of xdfs_devid_read(), modified to use xdf 3320 * functions. 3321 * 3322 * Read a devid from on the first block of the last track of 3323 * the last cylinder. Make sure what we read is a valid devid. 3324 * Return DDI_SUCCESS or DDI_FAILURE. 3325 */ 3326 static int 3327 xdf_devid_read(xdf_t *vdp) 3328 { 3329 diskaddr_t blk; 3330 struct dk_devid *dkdevidp; 3331 uint_t *ip, chksum; 3332 int i; 3333 3334 if (cmlb_get_devid_block(vdp->xdf_vd_lbl, &blk, NULL) != 0) 3335 return (DDI_FAILURE); 3336 3337 dkdevidp = kmem_zalloc(NBPSCTR, KM_SLEEP); 3338 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, dkdevidp, blk, 3339 NBPSCTR, NULL) != 0) 3340 goto err; 3341 3342 /* Validate the revision */ 3343 if ((dkdevidp->dkd_rev_hi != DK_DEVID_REV_MSB) || 3344 (dkdevidp->dkd_rev_lo != DK_DEVID_REV_LSB)) 3345 goto err; 3346 3347 /* Calculate the checksum */ 3348 chksum = 0; 3349 ip = (uint_t *)dkdevidp; 3350 for (i = 0; i < (NBPSCTR / sizeof (int)) - 1; i++) 3351 chksum ^= ip[i]; 3352 if (DKD_GETCHKSUM(dkdevidp) != chksum) 3353 goto err; 3354 3355 /* Validate the device id */ 3356 if (ddi_devid_valid((ddi_devid_t)dkdevidp->dkd_devid) != DDI_SUCCESS) 3357 goto err; 3358 3359 /* keep a copy of the device id */ 3360 i = ddi_devid_sizeof((ddi_devid_t)dkdevidp->dkd_devid); 3361 vdp->xdf_tgt_devid = kmem_alloc(i, KM_SLEEP); 3362 bcopy(dkdevidp->dkd_devid, vdp->xdf_tgt_devid, i); 3363 kmem_free(dkdevidp, NBPSCTR); 3364 return (DDI_SUCCESS); 3365 3366 err: 3367 kmem_free(dkdevidp, NBPSCTR); 3368 return (DDI_FAILURE); 3369 } 3370 3371 /* 3372 * xdf_devid_setup() is a modified copy of cmdk_devid_setup(). 3373 * 3374 * This function creates a devid if we don't already have one, and 3375 * registers it. If we already have one, we make sure that it can be 3376 * read from the disk, otherwise we write it to the disk ourselves. If 3377 * we didn't already have a devid, and we create one, we also need to 3378 * register it. 3379 */ 3380 void 3381 xdf_devid_setup(xdf_t *vdp) 3382 { 3383 int rc; 3384 boolean_t existed = vdp->xdf_tgt_devid != NULL; 3385 3386 /* Read devid from the disk, if present */ 3387 rc = xdf_devid_read(vdp); 3388 3389 /* Otherwise write a devid (which we create if necessary) on the disk */ 3390 if (rc != DDI_SUCCESS) 3391 rc = xdf_devid_fabricate(vdp); 3392 3393 /* If we created a devid or found it on the disk, register it */ 3394 if (rc == DDI_SUCCESS && !existed) 3395 (void) ddi_devid_register(vdp->xdf_dip, vdp->xdf_tgt_devid); 3396 } 3397 3398 static int 3399 xdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3400 { 3401 int n, instance = ddi_get_instance(dip); 3402 ddi_iblock_cookie_t ibc, softibc; 3403 boolean_t dev_iscd = B_FALSE; 3404 xdf_t *vdp; 3405 char *oename, *xsname, *str; 3406 clock_t timeout; 3407 int err = 0; 3408 3409 if ((n = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_NOTPROM, 3410 "xdf_debug", 0)) != 0) 3411 xdf_debug = n; 3412 3413 switch (cmd) { 3414 case DDI_RESUME: 3415 return (xdf_resume(dip)); 3416 case DDI_ATTACH: 3417 break; 3418 default: 3419 return (DDI_FAILURE); 3420 } 3421 /* DDI_ATTACH */ 3422 3423 if ((xsname = xvdi_get_xsname(dip)) == NULL || 3424 (oename = xvdi_get_oename(dip)) == NULL) 3425 return (DDI_FAILURE); 3426 3427 /* 3428 * Disable auto-detach. This is necessary so that we don't get 3429 * detached while we're disconnected from the back end. 3430 */ 3431 if ((ddi_prop_update_int(DDI_DEV_T_NONE, dip, 3432 DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS)) 3433 return (DDI_FAILURE); 3434 3435 /* driver handles kernel-issued IOCTLs */ 3436 if (ddi_prop_create(DDI_DEV_T_NONE, dip, 3437 DDI_PROP_CANSLEEP, DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) 3438 return (DDI_FAILURE); 3439 3440 if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS) 3441 return (DDI_FAILURE); 3442 3443 if (ddi_get_soft_iblock_cookie(dip, 3444 DDI_SOFTINT_LOW, &softibc) != DDI_SUCCESS) 3445 return (DDI_FAILURE); 3446 3447 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 3448 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 3449 ddi_get_name_addr(dip)); 3450 return (DDI_FAILURE); 3451 } 3452 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 3453 dev_iscd = B_TRUE; 3454 strfree(str); 3455 3456 if (ddi_soft_state_zalloc(xdf_ssp, instance) != DDI_SUCCESS) 3457 return (DDI_FAILURE); 3458 3459 DPRINTF(DDI_DBG, ("xdf@%s: attaching\n", ddi_get_name_addr(dip))); 3460 vdp = ddi_get_soft_state(xdf_ssp, instance); 3461 ddi_set_driver_private(dip, vdp); 3462 vdp->xdf_dip = dip; 3463 vdp->xdf_addr = ddi_get_name_addr(dip); 3464 vdp->xdf_suspending = B_FALSE; 3465 vdp->xdf_media_req_supported = B_FALSE; 3466 vdp->xdf_peer = INVALID_DOMID; 3467 vdp->xdf_evtchn = INVALID_EVTCHN; 3468 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 3469 offsetof(v_req_t, v_link)); 3470 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 3471 cv_init(&vdp->xdf_hp_status_cv, NULL, CV_DEFAULT, NULL); 3472 cv_init(&vdp->xdf_mstate_cv, NULL, CV_DEFAULT, NULL); 3473 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3474 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3475 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3476 vdp->xdf_cmlb_reattach = B_TRUE; 3477 if (dev_iscd) { 3478 vdp->xdf_dinfo |= VDISK_CDROM; 3479 vdp->xdf_mstate = DKIO_EJECTED; 3480 } else { 3481 vdp->xdf_mstate = DKIO_NONE; 3482 } 3483 3484 if ((vdp->xdf_ready_tq = ddi_taskq_create(dip, "xdf_ready_tq", 3485 1, TASKQ_DEFAULTPRI, 0)) == NULL) 3486 goto errout0; 3487 3488 if (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3489 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS) 3490 goto errout0; 3491 3492 if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 3493 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 3494 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 3495 ddi_get_name_addr(dip)); 3496 goto errout0; 3497 } 3498 3499 /* 3500 * Initialize the physical geometry stucture. Note that currently 3501 * we don't know the size of the backend device so the number 3502 * of blocks on the device will be initialized to zero. Once 3503 * we connect to the backend device we'll update the physical 3504 * geometry to reflect the real size of the device. 3505 */ 3506 xdf_synthetic_pgeom(dip, &vdp->xdf_pgeom); 3507 vdp->xdf_pgeom_fixed = B_FALSE; 3508 3509 /* 3510 * Allocate the cmlb handle, minor nodes will be created once 3511 * the device is connected with backend. 3512 */ 3513 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 3514 3515 /* We ship with cache-enabled disks */ 3516 vdp->xdf_wce = B_TRUE; 3517 3518 mutex_enter(&vdp->xdf_cb_lk); 3519 /* Watch backend XenbusState change */ 3520 if (xvdi_add_event_handler(dip, 3521 XS_OE_STATE, xdf_oe_change, NULL) != DDI_SUCCESS) { 3522 mutex_exit(&vdp->xdf_cb_lk); 3523 goto errout0; 3524 } 3525 3526 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3527 cmn_err(CE_WARN, "xdf@%s: start connection failed", 3528 ddi_get_name_addr(dip)); 3529 mutex_exit(&vdp->xdf_cb_lk); 3530 goto errout1; 3531 } 3532 3533 /* Nothing else to do for CD devices */ 3534 if (dev_iscd) { 3535 mutex_exit(&vdp->xdf_cb_lk); 3536 goto done; 3537 } 3538 3539 /* 3540 * In order to do cmlb_validate, we have to wait for the disk to 3541 * acknowledge the attach, so we can query the backend for the disk 3542 * geometry (see xdf_setstate_connected). 3543 * 3544 * We only wait 30 seconds; if this is the root disk, the boot 3545 * will fail, but it would fail anyway if the device never 3546 * connected. If this is a non-boot disk, that disk will fail 3547 * to connect, but again, it would fail anyway. 3548 */ 3549 timeout = ddi_get_lbolt() + drv_usectohz(XDF_STATE_TIMEOUT); 3550 while (vdp->xdf_state != XD_CONNECTED && vdp->xdf_state != XD_READY) { 3551 if (cv_timedwait(&vdp->xdf_dev_cv, &vdp->xdf_cb_lk, 3552 timeout) < 0) { 3553 cmn_err(CE_WARN, "xdf@%s: disk failed to connect", 3554 ddi_get_name_addr(dip)); 3555 mutex_exit(&vdp->xdf_cb_lk); 3556 goto errout1; 3557 } 3558 } 3559 mutex_exit(&vdp->xdf_cb_lk); 3560 3561 /* 3562 * We call cmlb_validate so that the geometry information in 3563 * vdp->xdf_vd_lbl is correct; this fills out the number of 3564 * alternate cylinders so that we have a place to write the 3565 * devid. 3566 */ 3567 if ((err = cmlb_validate(vdp->xdf_vd_lbl, 0, NULL)) != 0) { 3568 cmn_err(CE_NOTE, 3569 "xdf@%s: cmlb_validate failed: %d", 3570 ddi_get_name_addr(dip), err); 3571 /* 3572 * We can carry on even if cmlb_validate() returns EINVAL here, 3573 * as we'll rewrite the disk label anyway. 3574 */ 3575 if (err != EINVAL) 3576 goto errout1; 3577 } 3578 3579 /* 3580 * xdf_devid_setup will only write a devid if one isn't 3581 * already present. If it fails to find or create one, we 3582 * create one in-memory so that when we label the disk later, 3583 * it will have a devid to use. This is helpful to deal with 3584 * cases where people use the devids of their disks before 3585 * labelling them; note that this does cause problems if 3586 * people rely on the devids of unlabelled disks to persist 3587 * across reboot. 3588 */ 3589 xdf_devid_setup(vdp); 3590 if (vdp->xdf_tgt_devid == NULL) { 3591 if (ddi_devid_init(vdp->xdf_dip, DEVID_FAB, 0, NULL, 3592 &vdp->xdf_tgt_devid) != DDI_SUCCESS) { 3593 cmn_err(CE_WARN, 3594 "xdf@%s_ attach failed, devid_init failed", 3595 ddi_get_name_addr(dip)); 3596 goto errout1; 3597 } else { 3598 (void) ddi_devid_register(vdp->xdf_dip, 3599 vdp->xdf_tgt_devid); 3600 } 3601 } 3602 3603 done: 3604 #ifdef XPV_HVM_DRIVER 3605 xdf_hvm_add(dip); 3606 3607 /* Report our version to dom0 */ 3608 (void) xenbus_printf(XBT_NULL, "guest/xdf", "version", "%d", 3609 HVMPV_XDF_VERS); 3610 #endif /* XPV_HVM_DRIVER */ 3611 3612 /* Create kstat for iostat(8) */ 3613 if (xdf_kstat_create(dip) != 0) { 3614 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 3615 ddi_get_name_addr(dip)); 3616 goto errout1; 3617 } 3618 3619 /* 3620 * Don't bother with getting real device identification 3621 * strings (is it even possible?), they are unlikely to 3622 * change often (if at all). 3623 */ 3624 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, INQUIRY_VENDOR_ID, 3625 "Xen"); 3626 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, INQUIRY_PRODUCT_ID, 3627 dev_iscd ? "Virtual CD" : "Virtual disk"); 3628 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, INQUIRY_REVISION_ID, 3629 "1.0"); 3630 3631 ddi_report_dev(dip); 3632 DPRINTF(DDI_DBG, ("xdf@%s: attached\n", vdp->xdf_addr)); 3633 return (DDI_SUCCESS); 3634 3635 errout1: 3636 (void) xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed); 3637 xvdi_remove_event_handler(dip, XS_OE_STATE); 3638 errout0: 3639 if (vdp->xdf_vd_lbl != NULL) { 3640 cmlb_free_handle(&vdp->xdf_vd_lbl); 3641 vdp->xdf_vd_lbl = NULL; 3642 } 3643 if (vdp->xdf_softintr_id != NULL) 3644 ddi_remove_softintr(vdp->xdf_softintr_id); 3645 xvdi_remove_xb_watch_handlers(dip); 3646 if (vdp->xdf_ready_tq != NULL) 3647 ddi_taskq_destroy(vdp->xdf_ready_tq); 3648 mutex_destroy(&vdp->xdf_cb_lk); 3649 mutex_destroy(&vdp->xdf_dev_lk); 3650 cv_destroy(&vdp->xdf_dev_cv); 3651 cv_destroy(&vdp->xdf_hp_status_cv); 3652 ddi_soft_state_free(xdf_ssp, instance); 3653 ddi_set_driver_private(dip, NULL); 3654 ddi_prop_remove_all(dip); 3655 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(dip)); 3656 return (DDI_FAILURE); 3657 } 3658 3659 static int 3660 xdf_suspend(dev_info_t *dip) 3661 { 3662 int instance = ddi_get_instance(dip); 3663 xdf_t *vdp; 3664 3665 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) 3666 return (DDI_FAILURE); 3667 3668 if (xdf_debug & SUSRES_DBG) 3669 xen_printf("xdf@%s: xdf_suspend\n", vdp->xdf_addr); 3670 3671 xvdi_suspend(dip); 3672 3673 mutex_enter(&vdp->xdf_cb_lk); 3674 mutex_enter(&vdp->xdf_dev_lk); 3675 3676 vdp->xdf_suspending = B_TRUE; 3677 xdf_ring_destroy(vdp); 3678 xdf_set_state(vdp, XD_SUSPEND); 3679 vdp->xdf_suspending = B_FALSE; 3680 3681 mutex_exit(&vdp->xdf_dev_lk); 3682 mutex_exit(&vdp->xdf_cb_lk); 3683 3684 if (xdf_debug & SUSRES_DBG) 3685 xen_printf("xdf@%s: xdf_suspend: done\n", vdp->xdf_addr); 3686 3687 return (DDI_SUCCESS); 3688 } 3689 3690 static int 3691 xdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3692 { 3693 xdf_t *vdp; 3694 int instance; 3695 3696 switch (cmd) { 3697 3698 case DDI_PM_SUSPEND: 3699 break; 3700 3701 case DDI_SUSPEND: 3702 return (xdf_suspend(dip)); 3703 3704 case DDI_DETACH: 3705 break; 3706 3707 default: 3708 return (DDI_FAILURE); 3709 } 3710 3711 instance = ddi_get_instance(dip); 3712 DPRINTF(DDI_DBG, ("xdf@%s: detaching\n", ddi_get_name_addr(dip))); 3713 vdp = ddi_get_soft_state(xdf_ssp, instance); 3714 3715 if (vdp == NULL) 3716 return (DDI_FAILURE); 3717 3718 mutex_enter(&vdp->xdf_cb_lk); 3719 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 3720 if (vdp->xdf_state != XD_CLOSED) { 3721 mutex_exit(&vdp->xdf_cb_lk); 3722 return (DDI_FAILURE); 3723 } 3724 mutex_exit(&vdp->xdf_cb_lk); 3725 3726 ASSERT(!ISDMACBON(vdp)); 3727 3728 #ifdef XPV_HVM_DRIVER 3729 xdf_hvm_rm(dip); 3730 #endif /* XPV_HVM_DRIVER */ 3731 3732 if (vdp->xdf_timeout_id != 0) 3733 (void) untimeout(vdp->xdf_timeout_id); 3734 3735 xvdi_remove_event_handler(dip, XS_OE_STATE); 3736 ddi_taskq_destroy(vdp->xdf_ready_tq); 3737 3738 cmlb_detach(vdp->xdf_vd_lbl, NULL); 3739 cmlb_free_handle(&vdp->xdf_vd_lbl); 3740 3741 /* we'll support backend running in domU later */ 3742 #ifdef DOMU_BACKEND 3743 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 3744 #endif 3745 3746 list_destroy(&vdp->xdf_vreq_act); 3747 ddi_prop_remove_all(dip); 3748 xdf_kstat_delete(dip); 3749 ddi_remove_softintr(vdp->xdf_softintr_id); 3750 xvdi_remove_xb_watch_handlers(dip); 3751 ddi_set_driver_private(dip, NULL); 3752 cv_destroy(&vdp->xdf_dev_cv); 3753 mutex_destroy(&vdp->xdf_cb_lk); 3754 mutex_destroy(&vdp->xdf_dev_lk); 3755 if (vdp->xdf_cache_flush_block != NULL) 3756 kmem_free(vdp->xdf_flush_mem, 2 * vdp->xdf_xdev_secsize); 3757 ddi_soft_state_free(xdf_ssp, instance); 3758 return (DDI_SUCCESS); 3759 } 3760 3761 /* 3762 * Driver linkage structures. 3763 */ 3764 static struct cb_ops xdf_cbops = { 3765 xdf_open, 3766 xdf_close, 3767 xdf_strategy, 3768 nodev, 3769 xdf_dump, 3770 xdf_read, 3771 xdf_write, 3772 xdf_ioctl, 3773 nodev, 3774 nodev, 3775 nodev, 3776 nochpoll, 3777 xdf_prop_op, 3778 NULL, 3779 D_MP | D_NEW | D_64BIT, 3780 CB_REV, 3781 xdf_aread, 3782 xdf_awrite 3783 }; 3784 3785 struct dev_ops xdf_devops = { 3786 DEVO_REV, /* devo_rev */ 3787 0, /* devo_refcnt */ 3788 xdf_getinfo, /* devo_getinfo */ 3789 nulldev, /* devo_identify */ 3790 nulldev, /* devo_probe */ 3791 xdf_attach, /* devo_attach */ 3792 xdf_detach, /* devo_detach */ 3793 nodev, /* devo_reset */ 3794 &xdf_cbops, /* devo_cb_ops */ 3795 NULL, /* devo_bus_ops */ 3796 NULL, /* devo_power */ 3797 ddi_quiesce_not_supported, /* devo_quiesce */ 3798 }; 3799 3800 /* 3801 * Module linkage structures. 3802 */ 3803 static struct modldrv modldrv = { 3804 &mod_driverops, /* Type of module. This one is a driver */ 3805 "virtual block driver", /* short description */ 3806 &xdf_devops /* driver specific ops */ 3807 }; 3808 3809 static struct modlinkage xdf_modlinkage = { 3810 MODREV_1, (void *)&modldrv, NULL 3811 }; 3812 3813 /* 3814 * standard module entry points 3815 */ 3816 int 3817 _init(void) 3818 { 3819 int rc; 3820 3821 xdf_major = ddi_name_to_major("xdf"); 3822 if (xdf_major == (major_t)-1) 3823 return (EINVAL); 3824 3825 if ((rc = ddi_soft_state_init(&xdf_ssp, sizeof (xdf_t), 0)) != 0) 3826 return (rc); 3827 3828 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 3829 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3830 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 3831 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3832 3833 #ifdef XPV_HVM_DRIVER 3834 xdf_hvm_init(); 3835 #endif /* XPV_HVM_DRIVER */ 3836 3837 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 3838 #ifdef XPV_HVM_DRIVER 3839 xdf_hvm_fini(); 3840 #endif /* XPV_HVM_DRIVER */ 3841 kmem_cache_destroy(xdf_vreq_cache); 3842 kmem_cache_destroy(xdf_gs_cache); 3843 ddi_soft_state_fini(&xdf_ssp); 3844 return (rc); 3845 } 3846 3847 return (rc); 3848 } 3849 3850 int 3851 _fini(void) 3852 { 3853 int err; 3854 if ((err = mod_remove(&xdf_modlinkage)) != 0) 3855 return (err); 3856 3857 #ifdef XPV_HVM_DRIVER 3858 xdf_hvm_fini(); 3859 #endif /* XPV_HVM_DRIVER */ 3860 3861 kmem_cache_destroy(xdf_vreq_cache); 3862 kmem_cache_destroy(xdf_gs_cache); 3863 ddi_soft_state_fini(&xdf_ssp); 3864 3865 return (0); 3866 } 3867 3868 int 3869 _info(struct modinfo *modinfop) 3870 { 3871 return (mod_info(&xdf_modlinkage, modinfop)); 3872 } 3873