1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright (c) 2014, 2017 by Delphix. All rights reserved. 29 * Copyright 2017 Nexenta Systems, Inc. 30 */ 31 32 /* 33 * xdf.c - Xen Virtual Block Device Driver 34 * TODO: 35 * - support alternate block size (currently only DEV_BSIZE supported) 36 * - revalidate geometry for removable devices 37 * 38 * This driver exports disk device nodes, accepts IO requests from those 39 * nodes, and services those requests by talking to a backend device 40 * in another domain. 41 * 42 * Communication with the backend device is done via a ringbuffer (which is 43 * managed via xvdi interfaces) and dma memory (which is managed via ddi 44 * interfaces). 45 * 46 * Communication with the backend device is dependant upon establishing a 47 * connection to the backend device. This connection process involves 48 * reading device configuration information from xenbus and publishing 49 * some frontend runtime configuration parameters via the xenbus (for 50 * consumption by the backend). Once we've published runtime configuration 51 * information via the xenbus, the backend device can enter the connected 52 * state and we'll enter the XD_CONNECTED state. But before we can allow 53 * random IO to begin, we need to do IO to the backend device to determine 54 * the device label and if flush operations are supported. Once this is 55 * done we enter the XD_READY state and can process any IO operations. 56 * 57 * We receive notifications of xenbus state changes for the backend device 58 * (aka, the "other end") via the xdf_oe_change() callback. This callback 59 * is single threaded, meaning that we can't receive new notification of 60 * other end state changes while we're processing an outstanding 61 * notification of an other end state change. There for we can't do any 62 * blocking operations from the xdf_oe_change() callback. This is why we 63 * have a seperate taskq (xdf_ready_tq) which exists to do the necessary 64 * IO to get us from the XD_CONNECTED to the XD_READY state. All IO 65 * generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go 66 * throught xdf_lb_rdwr(), which is a synchronous IO interface. IOs 67 * generated by the xdf_ready_tq_thread thread have priority over all 68 * other IO requests. 69 * 70 * We also communicate with the backend device via the xenbus "media-req" 71 * (XBP_MEDIA_REQ) property. For more information on this see the 72 * comments in blkif.h. 73 */ 74 75 #include <io/xdf.h> 76 77 #include <sys/conf.h> 78 #include <sys/dkio.h> 79 #include <sys/promif.h> 80 #include <sys/sysmacros.h> 81 #include <sys/kstat.h> 82 #include <sys/mach_mmu.h> 83 #ifdef XPV_HVM_DRIVER 84 #include <sys/xpv_support.h> 85 #else /* !XPV_HVM_DRIVER */ 86 #include <sys/evtchn_impl.h> 87 #endif /* !XPV_HVM_DRIVER */ 88 #include <sys/sunndi.h> 89 #include <public/io/xenbus.h> 90 #include <xen/sys/xenbus_impl.h> 91 #include <sys/scsi/generic/inquiry.h> 92 #include <xen/io/blkif_impl.h> 93 #include <sys/fdio.h> 94 #include <sys/cdio.h> 95 96 /* 97 * DEBUG_EVAL can be used to include debug only statements without 98 * having to use '#ifdef DEBUG' statements 99 */ 100 #ifdef DEBUG 101 #define DEBUG_EVAL(x) (x) 102 #else /* !DEBUG */ 103 #define DEBUG_EVAL(x) 104 #endif /* !DEBUG */ 105 106 #define XDF_DRAIN_MSEC_DELAY (50*1000) /* 00.05 sec */ 107 #define XDF_DRAIN_RETRY_COUNT 200 /* 10.00 sec */ 108 #define XDF_STATE_TIMEOUT (30*1000*1000) /* 30.00 sec */ 109 110 #define INVALID_DOMID ((domid_t)-1) 111 #define FLUSH_DISKCACHE 0x1 112 #define WRITE_BARRIER 0x2 113 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 114 #define USE_WRITE_BARRIER(vdp) \ 115 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 116 #define USE_FLUSH_DISKCACHE(vdp) \ 117 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 118 #define IS_WRITE_BARRIER(vdp, bp) \ 119 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 120 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 121 #define IS_FLUSH_DISKCACHE(bp) \ 122 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 123 124 #define VREQ_DONE(vreq) \ 125 VOID2BOOLEAN(((vreq)->v_status == VREQ_DMAWIN_DONE) && \ 126 (((vreq)->v_flush_diskcache == FLUSH_DISKCACHE) || \ 127 (((vreq)->v_dmaw + 1) == (vreq)->v_ndmaws))) 128 129 #define BP_VREQ(bp) ((v_req_t *)((bp)->av_back)) 130 #define BP_VREQ_SET(bp, vreq) (((bp)->av_back = (buf_t *)(vreq))) 131 132 extern int do_polled_io; 133 134 /* run-time tunables that we don't want the compiler to optimize away */ 135 volatile int xdf_debug = 0; 136 volatile boolean_t xdf_barrier_flush_disable = B_FALSE; 137 138 /* per module globals */ 139 major_t xdf_major; 140 static void *xdf_ssp; 141 static kmem_cache_t *xdf_vreq_cache; 142 static kmem_cache_t *xdf_gs_cache; 143 static int xdf_maxphys = XB_MAXPHYS; 144 static diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 145 static int xdf_fbrewrites; /* flush block re-write count */ 146 147 /* misc public functions */ 148 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); 149 int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 150 151 /* misc private functions */ 152 static void xdf_io_start(xdf_t *); 153 static void xdf_devid_setup(xdf_t *); 154 155 /* callbacks from commmon label */ 156 static cmlb_tg_ops_t xdf_lb_ops = { 157 TG_DK_OPS_VERSION_1, 158 xdf_lb_rdwr, 159 xdf_lb_getinfo 160 }; 161 162 /* 163 * I/O buffer DMA attributes 164 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 165 */ 166 static ddi_dma_attr_t xb_dma_attr = { 167 DMA_ATTR_V0, 168 (uint64_t)0, /* lowest address */ 169 (uint64_t)0xffffffffffffffff, /* highest usable address */ 170 (uint64_t)0xffffff, /* DMA counter limit max */ 171 (uint64_t)XB_BSIZE, /* alignment in bytes */ 172 XB_BSIZE - 1, /* bitmap of burst sizes */ 173 XB_BSIZE, /* min transfer */ 174 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 175 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 176 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 177 XB_BSIZE, /* granularity */ 178 0, /* flags (reserved) */ 179 }; 180 181 static ddi_device_acc_attr_t xc_acc_attr = { 182 DDI_DEVICE_ATTR_V0, 183 DDI_NEVERSWAP_ACC, 184 DDI_STRICTORDER_ACC 185 }; 186 187 static void 188 xdf_timeout_handler(void *arg) 189 { 190 xdf_t *vdp = arg; 191 192 mutex_enter(&vdp->xdf_dev_lk); 193 vdp->xdf_timeout_id = 0; 194 mutex_exit(&vdp->xdf_dev_lk); 195 196 /* new timeout thread could be re-scheduled */ 197 xdf_io_start(vdp); 198 } 199 200 /* 201 * callback func when DMA/GTE resources is available 202 * 203 * Note: we only register one callback function to grant table subsystem 204 * since we only have one 'struct gnttab_free_callback' in xdf_t. 205 */ 206 static int 207 xdf_dmacallback(caddr_t arg) 208 { 209 xdf_t *vdp = (xdf_t *)arg; 210 ASSERT(vdp != NULL); 211 212 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 213 vdp->xdf_addr)); 214 215 ddi_trigger_softintr(vdp->xdf_softintr_id); 216 return (DDI_DMA_CALLBACK_DONE); 217 } 218 219 static ge_slot_t * 220 gs_get(xdf_t *vdp, int isread) 221 { 222 grant_ref_t gh; 223 ge_slot_t *gs; 224 225 /* try to alloc GTEs needed in this slot, first */ 226 if (gnttab_alloc_grant_references( 227 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 228 if (vdp->xdf_gnt_callback.next == NULL) { 229 SETDMACBON(vdp); 230 gnttab_request_free_callback( 231 &vdp->xdf_gnt_callback, 232 (void (*)(void *))xdf_dmacallback, 233 (void *)vdp, 234 BLKIF_MAX_SEGMENTS_PER_REQUEST); 235 } 236 return (NULL); 237 } 238 239 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 240 if (gs == NULL) { 241 gnttab_free_grant_references(gh); 242 if (vdp->xdf_timeout_id == 0) 243 /* restart I/O after one second */ 244 vdp->xdf_timeout_id = 245 timeout(xdf_timeout_handler, vdp, hz); 246 return (NULL); 247 } 248 249 /* init gs_slot */ 250 gs->gs_oeid = vdp->xdf_peer; 251 gs->gs_isread = isread; 252 gs->gs_ghead = gh; 253 gs->gs_ngrefs = 0; 254 255 return (gs); 256 } 257 258 static void 259 gs_free(ge_slot_t *gs) 260 { 261 int i; 262 263 /* release all grant table entry resources used in this slot */ 264 for (i = 0; i < gs->gs_ngrefs; i++) 265 gnttab_end_foreign_access(gs->gs_ge[i], !gs->gs_isread, 0); 266 gnttab_free_grant_references(gs->gs_ghead); 267 list_remove(&gs->gs_vreq->v_gs, gs); 268 kmem_cache_free(xdf_gs_cache, gs); 269 } 270 271 static grant_ref_t 272 gs_grant(ge_slot_t *gs, mfn_t mfn) 273 { 274 grant_ref_t gr = gnttab_claim_grant_reference(&gs->gs_ghead); 275 276 ASSERT(gr != -1); 277 ASSERT(gs->gs_ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 278 gs->gs_ge[gs->gs_ngrefs++] = gr; 279 gnttab_grant_foreign_access_ref(gr, gs->gs_oeid, mfn, !gs->gs_isread); 280 281 return (gr); 282 } 283 284 /* 285 * Alloc a vreq for this bp 286 * bp->av_back contains the pointer to the vreq upon return 287 */ 288 static v_req_t * 289 vreq_get(xdf_t *vdp, buf_t *bp) 290 { 291 v_req_t *vreq = NULL; 292 293 ASSERT(BP_VREQ(bp) == NULL); 294 295 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 296 if (vreq == NULL) { 297 if (vdp->xdf_timeout_id == 0) 298 /* restart I/O after one second */ 299 vdp->xdf_timeout_id = 300 timeout(xdf_timeout_handler, vdp, hz); 301 return (NULL); 302 } 303 bzero(vreq, sizeof (v_req_t)); 304 list_create(&vreq->v_gs, sizeof (ge_slot_t), 305 offsetof(ge_slot_t, gs_vreq_link)); 306 vreq->v_buf = bp; 307 vreq->v_status = VREQ_INIT; 308 vreq->v_runq = B_FALSE; 309 BP_VREQ_SET(bp, vreq); 310 /* init of other fields in vreq is up to the caller */ 311 312 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 313 314 return (vreq); 315 } 316 317 static void 318 vreq_free(xdf_t *vdp, v_req_t *vreq) 319 { 320 buf_t *bp = vreq->v_buf; 321 322 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 323 ASSERT(BP_VREQ(bp) == vreq); 324 325 list_remove(&vdp->xdf_vreq_act, vreq); 326 327 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 328 goto done; 329 330 switch (vreq->v_status) { 331 case VREQ_DMAWIN_DONE: 332 case VREQ_GS_ALLOCED: 333 case VREQ_DMABUF_BOUND: 334 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 335 /*FALLTHRU*/ 336 case VREQ_DMAMEM_ALLOCED: 337 if (!ALIGNED_XFER(bp)) { 338 ASSERT(vreq->v_abuf != NULL); 339 if (!IS_ERROR(bp) && IS_READ(bp)) 340 bcopy(vreq->v_abuf, bp->b_un.b_addr, 341 bp->b_bcount); 342 ddi_dma_mem_free(&vreq->v_align); 343 } 344 /*FALLTHRU*/ 345 case VREQ_MEMDMAHDL_ALLOCED: 346 if (!ALIGNED_XFER(bp)) 347 ddi_dma_free_handle(&vreq->v_memdmahdl); 348 /*FALLTHRU*/ 349 case VREQ_DMAHDL_ALLOCED: 350 ddi_dma_free_handle(&vreq->v_dmahdl); 351 break; 352 default: 353 break; 354 } 355 done: 356 ASSERT(!vreq->v_runq); 357 list_destroy(&vreq->v_gs); 358 kmem_cache_free(xdf_vreq_cache, vreq); 359 } 360 361 /* 362 * Snarf new data if our flush block was re-written 363 */ 364 static void 365 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 366 { 367 int nblks; 368 boolean_t mapin; 369 370 if (IS_WRITE_BARRIER(vdp, bp)) 371 return; /* write was a flush write */ 372 373 mapin = B_FALSE; 374 nblks = bp->b_bcount >> DEV_BSHIFT; 375 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 376 xdf_fbrewrites++; 377 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 378 mapin = B_TRUE; 379 bp_mapin(bp); 380 } 381 bcopy(bp->b_un.b_addr + 382 ((xdf_flush_block - blkno) << DEV_BSHIFT), 383 vdp->xdf_cache_flush_block, DEV_BSIZE); 384 if (mapin) 385 bp_mapout(bp); 386 } 387 } 388 389 /* 390 * Initalize the DMA and grant table resources for the buf 391 */ 392 static int 393 vreq_setup(xdf_t *vdp, v_req_t *vreq) 394 { 395 int rc; 396 ddi_dma_attr_t dmaattr; 397 uint_t ndcs, ndws; 398 ddi_dma_handle_t dh; 399 ddi_dma_handle_t mdh; 400 ddi_dma_cookie_t dc; 401 ddi_acc_handle_t abh; 402 caddr_t aba; 403 ge_slot_t *gs; 404 size_t bufsz; 405 off_t off; 406 size_t sz; 407 buf_t *bp = vreq->v_buf; 408 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 409 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 410 411 switch (vreq->v_status) { 412 case VREQ_INIT: 413 if (IS_FLUSH_DISKCACHE(bp)) { 414 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 415 DPRINTF(DMA_DBG, ("xdf@%s: " 416 "get ge_slotfailed\n", vdp->xdf_addr)); 417 return (DDI_FAILURE); 418 } 419 vreq->v_blkno = 0; 420 vreq->v_nslots = 1; 421 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 422 vreq->v_status = VREQ_GS_ALLOCED; 423 gs->gs_vreq = vreq; 424 list_insert_head(&vreq->v_gs, gs); 425 return (DDI_SUCCESS); 426 } 427 428 if (IS_WRITE_BARRIER(vdp, bp)) 429 vreq->v_flush_diskcache = WRITE_BARRIER; 430 vreq->v_blkno = bp->b_blkno + 431 (diskaddr_t)(uintptr_t)bp->b_private; 432 /* See if we wrote new data to our flush block */ 433 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 434 check_fbwrite(vdp, bp, vreq->v_blkno); 435 vreq->v_status = VREQ_INIT_DONE; 436 /*FALLTHRU*/ 437 438 case VREQ_INIT_DONE: 439 /* 440 * alloc DMA handle 441 */ 442 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 443 xdf_dmacallback, (caddr_t)vdp, &dh); 444 if (rc != DDI_SUCCESS) { 445 SETDMACBON(vdp); 446 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 447 vdp->xdf_addr)); 448 return (DDI_FAILURE); 449 } 450 451 vreq->v_dmahdl = dh; 452 vreq->v_status = VREQ_DMAHDL_ALLOCED; 453 /*FALLTHRU*/ 454 455 case VREQ_DMAHDL_ALLOCED: 456 /* 457 * alloc dma handle for 512-byte aligned buf 458 */ 459 if (!ALIGNED_XFER(bp)) { 460 /* 461 * XXPV: we need to temporarily enlarge the seg 462 * boundary and s/g length to work round CR6381968 463 */ 464 dmaattr = xb_dma_attr; 465 dmaattr.dma_attr_seg = (uint64_t)-1; 466 dmaattr.dma_attr_sgllen = INT_MAX; 467 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 468 xdf_dmacallback, (caddr_t)vdp, &mdh); 469 if (rc != DDI_SUCCESS) { 470 SETDMACBON(vdp); 471 DPRINTF(DMA_DBG, ("xdf@%s: " 472 "unaligned buf DMAhandle alloc failed\n", 473 vdp->xdf_addr)); 474 return (DDI_FAILURE); 475 } 476 vreq->v_memdmahdl = mdh; 477 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 478 } 479 /*FALLTHRU*/ 480 481 case VREQ_MEMDMAHDL_ALLOCED: 482 /* 483 * alloc 512-byte aligned buf 484 */ 485 if (!ALIGNED_XFER(bp)) { 486 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 487 bp_mapin(bp); 488 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 489 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 490 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 491 &aba, &bufsz, &abh); 492 if (rc != DDI_SUCCESS) { 493 SETDMACBON(vdp); 494 DPRINTF(DMA_DBG, ("xdf@%s: " 495 "DMA mem allocation failed\n", 496 vdp->xdf_addr)); 497 return (DDI_FAILURE); 498 } 499 500 vreq->v_abuf = aba; 501 vreq->v_align = abh; 502 vreq->v_status = VREQ_DMAMEM_ALLOCED; 503 504 ASSERT(bufsz >= bp->b_bcount); 505 if (!IS_READ(bp)) 506 bcopy(bp->b_un.b_addr, vreq->v_abuf, 507 bp->b_bcount); 508 } 509 /*FALLTHRU*/ 510 511 case VREQ_DMAMEM_ALLOCED: 512 /* 513 * dma bind 514 */ 515 if (ALIGNED_XFER(bp)) { 516 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 517 dma_flags, xdf_dmacallback, (caddr_t)vdp, 518 &dc, &ndcs); 519 } else { 520 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 521 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 522 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 523 } 524 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 525 /* get num of dma windows */ 526 if (rc == DDI_DMA_PARTIAL_MAP) { 527 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 528 ASSERT(rc == DDI_SUCCESS); 529 } else { 530 ndws = 1; 531 } 532 } else { 533 SETDMACBON(vdp); 534 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 535 vdp->xdf_addr)); 536 return (DDI_FAILURE); 537 } 538 539 vreq->v_dmac = dc; 540 vreq->v_dmaw = 0; 541 vreq->v_ndmacs = ndcs; 542 vreq->v_ndmaws = ndws; 543 vreq->v_nslots = ndws; 544 vreq->v_status = VREQ_DMABUF_BOUND; 545 /*FALLTHRU*/ 546 547 case VREQ_DMABUF_BOUND: 548 /* 549 * get ge_slot, callback is set upon failure from gs_get(), 550 * if not set previously 551 */ 552 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 553 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 554 vdp->xdf_addr)); 555 return (DDI_FAILURE); 556 } 557 558 vreq->v_status = VREQ_GS_ALLOCED; 559 gs->gs_vreq = vreq; 560 list_insert_head(&vreq->v_gs, gs); 561 break; 562 563 case VREQ_GS_ALLOCED: 564 /* nothing need to be done */ 565 break; 566 567 case VREQ_DMAWIN_DONE: 568 /* 569 * move to the next dma window 570 */ 571 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 572 573 /* get a ge_slot for this DMA window */ 574 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 575 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 576 vdp->xdf_addr)); 577 return (DDI_FAILURE); 578 } 579 580 vreq->v_dmaw++; 581 VERIFY(ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 582 &vreq->v_dmac, &vreq->v_ndmacs) == DDI_SUCCESS); 583 vreq->v_status = VREQ_GS_ALLOCED; 584 gs->gs_vreq = vreq; 585 list_insert_head(&vreq->v_gs, gs); 586 break; 587 588 default: 589 return (DDI_FAILURE); 590 } 591 592 return (DDI_SUCCESS); 593 } 594 595 static int 596 xdf_cmlb_attach(xdf_t *vdp) 597 { 598 dev_info_t *dip = vdp->xdf_dip; 599 600 return (cmlb_attach(dip, &xdf_lb_ops, 601 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 602 XD_IS_RM(vdp), B_TRUE, 603 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD, 604 0, vdp->xdf_vd_lbl, NULL)); 605 } 606 607 static void 608 xdf_io_err(buf_t *bp, int err, size_t resid) 609 { 610 bioerror(bp, err); 611 if (resid == 0) 612 bp->b_resid = bp->b_bcount; 613 biodone(bp); 614 } 615 616 static void 617 xdf_kstat_enter(xdf_t *vdp, buf_t *bp) 618 { 619 v_req_t *vreq = BP_VREQ(bp); 620 621 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 622 623 if (vdp->xdf_xdev_iostat == NULL) 624 return; 625 if ((vreq != NULL) && vreq->v_runq) { 626 kstat_runq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 627 } else { 628 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 629 } 630 } 631 632 static void 633 xdf_kstat_exit(xdf_t *vdp, buf_t *bp) 634 { 635 v_req_t *vreq = BP_VREQ(bp); 636 637 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 638 639 if (vdp->xdf_xdev_iostat == NULL) 640 return; 641 642 if ((vreq != NULL) && vreq->v_runq) { 643 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 644 } else { 645 kstat_waitq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 646 } 647 648 if (bp->b_flags & B_READ) { 649 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->reads++; 650 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->nread += bp->b_bcount; 651 } else if (bp->b_flags & B_WRITE) { 652 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->writes++; 653 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->nwritten += bp->b_bcount; 654 } 655 } 656 657 static void 658 xdf_kstat_waitq_to_runq(xdf_t *vdp, buf_t *bp) 659 { 660 v_req_t *vreq = BP_VREQ(bp); 661 662 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 663 ASSERT(!vreq->v_runq); 664 665 vreq->v_runq = B_TRUE; 666 if (vdp->xdf_xdev_iostat == NULL) 667 return; 668 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 669 } 670 671 static void 672 xdf_kstat_runq_to_waitq(xdf_t *vdp, buf_t *bp) 673 { 674 v_req_t *vreq = BP_VREQ(bp); 675 676 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 677 ASSERT(vreq->v_runq); 678 679 vreq->v_runq = B_FALSE; 680 if (vdp->xdf_xdev_iostat == NULL) 681 return; 682 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 683 } 684 685 int 686 xdf_kstat_create(dev_info_t *dip) 687 { 688 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 689 kstat_t *kstat; 690 buf_t *bp; 691 692 if ((kstat = kstat_create("xdf", ddi_get_instance(dip), NULL, "disk", 693 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) 694 return (-1); 695 696 /* See comment about locking in xdf_kstat_delete(). */ 697 mutex_enter(&vdp->xdf_iostat_lk); 698 mutex_enter(&vdp->xdf_dev_lk); 699 700 /* only one kstat can exist at a time */ 701 if (vdp->xdf_xdev_iostat != NULL) { 702 mutex_exit(&vdp->xdf_dev_lk); 703 mutex_exit(&vdp->xdf_iostat_lk); 704 kstat_delete(kstat); 705 return (-1); 706 } 707 708 vdp->xdf_xdev_iostat = kstat; 709 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 710 kstat_install(vdp->xdf_xdev_iostat); 711 712 /* 713 * Now that we've created a kstat, we need to update the waitq and 714 * runq counts for the kstat to reflect our current state. 715 * 716 * For a buf_t structure to be on the runq, it must have a ring 717 * buffer slot associated with it. To get a ring buffer slot the 718 * buf must first have a v_req_t and a ge_slot_t associated with it. 719 * Then when it is granted a ring buffer slot, v_runq will be set to 720 * true. 721 * 722 * For a buf_t structure to be on the waitq, it must not be on the 723 * runq. So to find all the buf_t's that should be on waitq, we 724 * walk the active buf list and add any buf_t's which aren't on the 725 * runq to the waitq. 726 */ 727 bp = vdp->xdf_f_act; 728 while (bp != NULL) { 729 xdf_kstat_enter(vdp, bp); 730 bp = bp->av_forw; 731 } 732 if (vdp->xdf_ready_tq_bp != NULL) 733 xdf_kstat_enter(vdp, vdp->xdf_ready_tq_bp); 734 735 mutex_exit(&vdp->xdf_dev_lk); 736 mutex_exit(&vdp->xdf_iostat_lk); 737 return (0); 738 } 739 740 void 741 xdf_kstat_delete(dev_info_t *dip) 742 { 743 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 744 kstat_t *kstat; 745 buf_t *bp; 746 747 /* 748 * The locking order here is xdf_iostat_lk and then xdf_dev_lk. 749 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer 750 * and the contents of the our kstat. xdf_iostat_lk is used 751 * to protect the allocation and freeing of the actual kstat. 752 * xdf_dev_lk can't be used for this purpose because kstat 753 * readers use it to access the contents of the kstat and 754 * hence it can't be held when calling kstat_delete(). 755 */ 756 mutex_enter(&vdp->xdf_iostat_lk); 757 mutex_enter(&vdp->xdf_dev_lk); 758 759 if (vdp->xdf_xdev_iostat == NULL) { 760 mutex_exit(&vdp->xdf_dev_lk); 761 mutex_exit(&vdp->xdf_iostat_lk); 762 return; 763 } 764 765 /* 766 * We're about to destroy the kstat structures, so it isn't really 767 * necessary to update the runq and waitq counts. But, since this 768 * isn't a hot code path we can afford to be a little pedantic and 769 * go ahead and decrement the runq and waitq kstat counters to zero 770 * before free'ing them. This helps us ensure that we've gotten all 771 * our accounting correct. 772 * 773 * For an explanation of how we determine which buffers go on the 774 * runq vs which go on the waitq, see the comments in 775 * xdf_kstat_create(). 776 */ 777 bp = vdp->xdf_f_act; 778 while (bp != NULL) { 779 xdf_kstat_exit(vdp, bp); 780 bp = bp->av_forw; 781 } 782 if (vdp->xdf_ready_tq_bp != NULL) 783 xdf_kstat_exit(vdp, vdp->xdf_ready_tq_bp); 784 785 kstat = vdp->xdf_xdev_iostat; 786 vdp->xdf_xdev_iostat = NULL; 787 mutex_exit(&vdp->xdf_dev_lk); 788 kstat_delete(kstat); 789 mutex_exit(&vdp->xdf_iostat_lk); 790 } 791 792 /* 793 * Add an IO requests onto the active queue. 794 * 795 * We have to detect IOs generated by xdf_ready_tq_thread. These IOs 796 * are used to establish a connection to the backend, so they receive 797 * priority over all other IOs. Since xdf_ready_tq_thread only does 798 * synchronous IO, there can only be one xdf_ready_tq_thread request at any 799 * given time and we record the buf associated with that request in 800 * xdf_ready_tq_bp. 801 */ 802 static void 803 xdf_bp_push(xdf_t *vdp, buf_t *bp) 804 { 805 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 806 ASSERT(bp->av_forw == NULL); 807 808 xdf_kstat_enter(vdp, bp); 809 810 if (curthread == vdp->xdf_ready_tq_thread) { 811 /* new IO requests from the ready thread */ 812 ASSERT(vdp->xdf_ready_tq_bp == NULL); 813 vdp->xdf_ready_tq_bp = bp; 814 return; 815 } 816 817 /* this is normal IO request */ 818 ASSERT(bp != vdp->xdf_ready_tq_bp); 819 820 if (vdp->xdf_f_act == NULL) { 821 /* this is only only IO on the active queue */ 822 ASSERT(vdp->xdf_l_act == NULL); 823 ASSERT(vdp->xdf_i_act == NULL); 824 vdp->xdf_f_act = vdp->xdf_l_act = vdp->xdf_i_act = bp; 825 return; 826 } 827 828 /* add this IO to the tail of the active queue */ 829 vdp->xdf_l_act->av_forw = bp; 830 vdp->xdf_l_act = bp; 831 if (vdp->xdf_i_act == NULL) 832 vdp->xdf_i_act = bp; 833 } 834 835 static void 836 xdf_bp_pop(xdf_t *vdp, buf_t *bp) 837 { 838 buf_t *bp_iter; 839 840 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 841 ASSERT(VREQ_DONE(BP_VREQ(bp))); 842 843 if (vdp->xdf_ready_tq_bp == bp) { 844 /* we're done with a ready thread IO request */ 845 ASSERT(bp->av_forw == NULL); 846 vdp->xdf_ready_tq_bp = NULL; 847 return; 848 } 849 850 /* we're done with a normal IO request */ 851 ASSERT((bp->av_forw != NULL) || (bp == vdp->xdf_l_act)); 852 ASSERT((bp->av_forw == NULL) || (bp != vdp->xdf_l_act)); 853 ASSERT(VREQ_DONE(BP_VREQ(vdp->xdf_f_act))); 854 ASSERT(vdp->xdf_f_act != vdp->xdf_i_act); 855 856 if (bp == vdp->xdf_f_act) { 857 /* This IO was at the head of our active queue. */ 858 vdp->xdf_f_act = bp->av_forw; 859 if (bp == vdp->xdf_l_act) 860 vdp->xdf_l_act = NULL; 861 } else { 862 /* There IO finished before some other pending IOs. */ 863 bp_iter = vdp->xdf_f_act; 864 while (bp != bp_iter->av_forw) { 865 bp_iter = bp_iter->av_forw; 866 ASSERT(VREQ_DONE(BP_VREQ(bp_iter))); 867 ASSERT(bp_iter != vdp->xdf_i_act); 868 } 869 bp_iter->av_forw = bp->av_forw; 870 if (bp == vdp->xdf_l_act) 871 vdp->xdf_l_act = bp_iter; 872 } 873 bp->av_forw = NULL; 874 } 875 876 static buf_t * 877 xdf_bp_next(xdf_t *vdp) 878 { 879 v_req_t *vreq; 880 buf_t *bp; 881 882 if (vdp->xdf_state == XD_CONNECTED) { 883 /* 884 * If we're in the XD_CONNECTED state, we only service IOs 885 * from the xdf_ready_tq_thread thread. 886 */ 887 if ((bp = vdp->xdf_ready_tq_bp) == NULL) 888 return (NULL); 889 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 890 return (bp); 891 return (NULL); 892 } 893 894 /* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */ 895 if (vdp->xdf_state != XD_READY) 896 return (NULL); 897 898 ASSERT(vdp->xdf_ready_tq_bp == NULL); 899 for (;;) { 900 if ((bp = vdp->xdf_i_act) == NULL) 901 return (NULL); 902 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 903 return (bp); 904 905 /* advance the active buf index pointer */ 906 vdp->xdf_i_act = bp->av_forw; 907 } 908 } 909 910 static void 911 xdf_io_fini(xdf_t *vdp, uint64_t id, int bioerr) 912 { 913 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 914 v_req_t *vreq = gs->gs_vreq; 915 buf_t *bp = vreq->v_buf; 916 917 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 918 ASSERT(BP_VREQ(bp) == vreq); 919 920 gs_free(gs); 921 922 if (bioerr != 0) 923 bioerror(bp, bioerr); 924 ASSERT(vreq->v_nslots > 0); 925 if (--vreq->v_nslots > 0) 926 return; 927 928 /* remove this IO from our active queue */ 929 xdf_bp_pop(vdp, bp); 930 931 ASSERT(vreq->v_runq); 932 xdf_kstat_exit(vdp, bp); 933 vreq->v_runq = B_FALSE; 934 vreq_free(vdp, vreq); 935 936 if (IS_ERROR(bp)) { 937 xdf_io_err(bp, geterror(bp), 0); 938 } else if (bp->b_resid != 0) { 939 /* Partial transfers are an error */ 940 xdf_io_err(bp, EIO, bp->b_resid); 941 } else { 942 biodone(bp); 943 } 944 } 945 946 /* 947 * xdf interrupt handler 948 */ 949 static uint_t 950 xdf_intr_locked(xdf_t *vdp) 951 { 952 xendev_ring_t *xbr; 953 blkif_response_t *resp; 954 int bioerr; 955 uint64_t id; 956 uint8_t op; 957 uint16_t status; 958 ddi_acc_handle_t acchdl; 959 960 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 961 962 if ((xbr = vdp->xdf_xb_ring) == NULL) 963 return (DDI_INTR_UNCLAIMED); 964 965 acchdl = vdp->xdf_xb_ring_hdl; 966 967 /* 968 * complete all requests which have a response 969 */ 970 while (resp = xvdi_ring_get_response(xbr)) { 971 id = ddi_get64(acchdl, &resp->id); 972 op = ddi_get8(acchdl, &resp->operation); 973 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 974 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 975 op, id, status)); 976 977 if (status != BLKIF_RSP_OKAY) { 978 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 979 vdp->xdf_addr, 980 (op == BLKIF_OP_READ) ? "reading" : "writing")); 981 bioerr = EIO; 982 } else { 983 bioerr = 0; 984 } 985 986 xdf_io_fini(vdp, id, bioerr); 987 } 988 return (DDI_INTR_CLAIMED); 989 } 990 991 /* 992 * xdf_intr runs at PIL 5, so no one else can grab xdf_dev_lk and 993 * block at a lower pil. 994 */ 995 static uint_t 996 xdf_intr(caddr_t arg) 997 { 998 xdf_t *vdp = (xdf_t *)arg; 999 int rv; 1000 1001 mutex_enter(&vdp->xdf_dev_lk); 1002 rv = xdf_intr_locked(vdp); 1003 mutex_exit(&vdp->xdf_dev_lk); 1004 1005 if (!do_polled_io) 1006 xdf_io_start(vdp); 1007 1008 return (rv); 1009 } 1010 1011 static void 1012 xdf_ring_push(xdf_t *vdp) 1013 { 1014 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1015 1016 if (vdp->xdf_xb_ring == NULL) 1017 return; 1018 1019 if (xvdi_ring_push_request(vdp->xdf_xb_ring)) { 1020 DPRINTF(IO_DBG, ( 1021 "xdf@%s: xdf_ring_push: sent request(s) to backend\n", 1022 vdp->xdf_addr)); 1023 } 1024 1025 if (xvdi_get_evtchn(vdp->xdf_dip) != INVALID_EVTCHN) 1026 xvdi_notify_oe(vdp->xdf_dip); 1027 } 1028 1029 static int 1030 xdf_ring_drain_locked(xdf_t *vdp) 1031 { 1032 int pollc, rv = 0; 1033 1034 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1035 1036 if (xdf_debug & SUSRES_DBG) 1037 xen_printf("xdf_ring_drain: start\n"); 1038 1039 for (pollc = 0; pollc < XDF_DRAIN_RETRY_COUNT; pollc++) { 1040 if (vdp->xdf_xb_ring == NULL) 1041 goto out; 1042 1043 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1044 (void) xdf_intr_locked(vdp); 1045 if (!xvdi_ring_has_incomp_request(vdp->xdf_xb_ring)) 1046 goto out; 1047 xdf_ring_push(vdp); 1048 1049 /* file-backed devices can be slow */ 1050 mutex_exit(&vdp->xdf_dev_lk); 1051 #ifdef XPV_HVM_DRIVER 1052 (void) HYPERVISOR_yield(); 1053 #endif /* XPV_HVM_DRIVER */ 1054 delay(drv_usectohz(XDF_DRAIN_MSEC_DELAY)); 1055 mutex_enter(&vdp->xdf_dev_lk); 1056 } 1057 cmn_err(CE_WARN, "xdf@%s: xdf_ring_drain: timeout", vdp->xdf_addr); 1058 1059 out: 1060 if (vdp->xdf_xb_ring != NULL) { 1061 if (xvdi_ring_has_incomp_request(vdp->xdf_xb_ring) || 1062 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1063 rv = EIO; 1064 } 1065 if (xdf_debug & SUSRES_DBG) 1066 xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n", 1067 vdp->xdf_addr, rv); 1068 return (rv); 1069 } 1070 1071 static int 1072 xdf_ring_drain(xdf_t *vdp) 1073 { 1074 int rv; 1075 mutex_enter(&vdp->xdf_dev_lk); 1076 rv = xdf_ring_drain_locked(vdp); 1077 mutex_exit(&vdp->xdf_dev_lk); 1078 return (rv); 1079 } 1080 1081 /* 1082 * Destroy all v_req_t, grant table entries, and our ring buffer. 1083 */ 1084 static void 1085 xdf_ring_destroy(xdf_t *vdp) 1086 { 1087 v_req_t *vreq; 1088 buf_t *bp; 1089 ge_slot_t *gs; 1090 1091 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1092 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1093 1094 if ((vdp->xdf_state != XD_INIT) && 1095 (vdp->xdf_state != XD_CONNECTED) && 1096 (vdp->xdf_state != XD_READY)) { 1097 ASSERT(vdp->xdf_xb_ring == NULL); 1098 ASSERT(vdp->xdf_xb_ring_hdl == NULL); 1099 ASSERT(vdp->xdf_peer == INVALID_DOMID); 1100 ASSERT(vdp->xdf_evtchn == INVALID_EVTCHN); 1101 ASSERT(list_is_empty(&vdp->xdf_vreq_act)); 1102 return; 1103 } 1104 1105 /* 1106 * We don't want to receive async notifications from the backend 1107 * when it finishes processing ring entries. 1108 */ 1109 #ifdef XPV_HVM_DRIVER 1110 ec_unbind_evtchn(vdp->xdf_evtchn); 1111 #else /* !XPV_HVM_DRIVER */ 1112 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1113 #endif /* !XPV_HVM_DRIVER */ 1114 1115 /* 1116 * Drain any requests in the ring. We need to do this before we 1117 * can free grant table entries, because if active ring entries 1118 * point to grants, then the backend could be trying to access 1119 * those grants. 1120 */ 1121 (void) xdf_ring_drain_locked(vdp); 1122 1123 /* We're done talking to the backend so free up our event channel */ 1124 xvdi_free_evtchn(vdp->xdf_dip); 1125 vdp->xdf_evtchn = INVALID_EVTCHN; 1126 1127 while ((vreq = list_head(&vdp->xdf_vreq_act)) != NULL) { 1128 bp = vreq->v_buf; 1129 ASSERT(BP_VREQ(bp) == vreq); 1130 1131 /* Free up any grant table entries associaed with this IO */ 1132 while ((gs = list_head(&vreq->v_gs)) != NULL) 1133 gs_free(gs); 1134 1135 /* If this IO was on the runq, move it back to the waitq. */ 1136 if (vreq->v_runq) 1137 xdf_kstat_runq_to_waitq(vdp, bp); 1138 1139 /* 1140 * Reset any buf IO state since we're going to re-issue the 1141 * IO when we reconnect. 1142 */ 1143 vreq_free(vdp, vreq); 1144 BP_VREQ_SET(bp, NULL); 1145 bioerror(bp, 0); 1146 } 1147 1148 /* reset the active queue index pointer */ 1149 vdp->xdf_i_act = vdp->xdf_f_act; 1150 1151 /* Destroy the ring */ 1152 xvdi_free_ring(vdp->xdf_xb_ring); 1153 vdp->xdf_xb_ring = NULL; 1154 vdp->xdf_xb_ring_hdl = NULL; 1155 vdp->xdf_peer = INVALID_DOMID; 1156 } 1157 1158 void 1159 xdfmin(struct buf *bp) 1160 { 1161 if (bp->b_bcount > xdf_maxphys) 1162 bp->b_bcount = xdf_maxphys; 1163 } 1164 1165 /* 1166 * Check if we have a pending "eject" media request. 1167 */ 1168 static int 1169 xdf_eject_pending(xdf_t *vdp) 1170 { 1171 dev_info_t *dip = vdp->xdf_dip; 1172 char *xsname, *str; 1173 1174 if (!vdp->xdf_media_req_supported) 1175 return (B_FALSE); 1176 1177 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1178 (xenbus_read_str(xsname, XBP_MEDIA_REQ, &str) != 0)) 1179 return (B_FALSE); 1180 1181 if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) { 1182 strfree(str); 1183 return (B_FALSE); 1184 } 1185 strfree(str); 1186 return (B_TRUE); 1187 } 1188 1189 /* 1190 * Generate a media request. 1191 */ 1192 static int 1193 xdf_media_req(xdf_t *vdp, char *req, boolean_t media_required) 1194 { 1195 dev_info_t *dip = vdp->xdf_dip; 1196 char *xsname; 1197 1198 /* 1199 * we can't be holding xdf_dev_lk because xenbus_printf() can 1200 * block while waiting for a PIL 1 interrupt message. this 1201 * would cause a deadlock with xdf_intr() which needs to grab 1202 * xdf_dev_lk as well and runs at PIL 5. 1203 */ 1204 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1205 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1206 1207 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1208 return (ENXIO); 1209 1210 /* Check if we support media requests */ 1211 if (!XD_IS_CD(vdp) || !vdp->xdf_media_req_supported) 1212 return (ENOTTY); 1213 1214 /* If an eject is pending then don't allow any new requests */ 1215 if (xdf_eject_pending(vdp)) 1216 return (ENXIO); 1217 1218 /* Make sure that there is media present */ 1219 if (media_required && (vdp->xdf_xdev_nblocks == 0)) 1220 return (ENXIO); 1221 1222 /* We only allow operations when the device is ready and connected */ 1223 if (vdp->xdf_state != XD_READY) 1224 return (EIO); 1225 1226 if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ, "%s", req) != 0) 1227 return (EIO); 1228 1229 return (0); 1230 } 1231 1232 /* 1233 * populate a single blkif_request_t w/ a buf 1234 */ 1235 static void 1236 xdf_process_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1237 { 1238 grant_ref_t gr; 1239 uint8_t fsect, lsect; 1240 size_t bcnt; 1241 paddr_t dma_addr; 1242 off_t blk_off; 1243 dev_info_t *dip = vdp->xdf_dip; 1244 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1245 v_req_t *vreq = BP_VREQ(bp); 1246 uint64_t blkno = vreq->v_blkno; 1247 uint_t ndmacs = vreq->v_ndmacs; 1248 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1249 int seg = 0; 1250 int isread = IS_READ(bp); 1251 ge_slot_t *gs = list_head(&vreq->v_gs); 1252 1253 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1254 ASSERT(vreq->v_status == VREQ_GS_ALLOCED); 1255 1256 if (isread) 1257 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1258 else { 1259 switch (vreq->v_flush_diskcache) { 1260 case FLUSH_DISKCACHE: 1261 ddi_put8(acchdl, &rreq->operation, 1262 BLKIF_OP_FLUSH_DISKCACHE); 1263 ddi_put16(acchdl, &rreq->handle, vdev); 1264 ddi_put64(acchdl, &rreq->id, 1265 (uint64_t)(uintptr_t)(gs)); 1266 ddi_put8(acchdl, &rreq->nr_segments, 0); 1267 vreq->v_status = VREQ_DMAWIN_DONE; 1268 return; 1269 case WRITE_BARRIER: 1270 ddi_put8(acchdl, &rreq->operation, 1271 BLKIF_OP_WRITE_BARRIER); 1272 break; 1273 default: 1274 if (!vdp->xdf_wce) 1275 ddi_put8(acchdl, &rreq->operation, 1276 BLKIF_OP_WRITE_BARRIER); 1277 else 1278 ddi_put8(acchdl, &rreq->operation, 1279 BLKIF_OP_WRITE); 1280 break; 1281 } 1282 } 1283 1284 ddi_put16(acchdl, &rreq->handle, vdev); 1285 ddi_put64(acchdl, &rreq->sector_number, blkno); 1286 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(gs)); 1287 1288 /* 1289 * loop until all segments are populated or no more dma cookie in buf 1290 */ 1291 for (;;) { 1292 /* 1293 * Each segment of a blkif request can transfer up to 1294 * one 4K page of data. 1295 */ 1296 bcnt = vreq->v_dmac.dmac_size; 1297 dma_addr = vreq->v_dmac.dmac_laddress; 1298 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1299 fsect = blk_off >> XB_BSHIFT; 1300 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1301 1302 ASSERT(bcnt <= PAGESIZE); 1303 ASSERT((bcnt % XB_BSIZE) == 0); 1304 ASSERT((blk_off & XB_BMASK) == 0); 1305 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1306 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1307 1308 gr = gs_grant(gs, PATOMA(dma_addr) >> PAGESHIFT); 1309 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1310 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1311 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1312 1313 DPRINTF(IO_DBG, ( 1314 "xdf@%s: seg%d: dmacS %lu blk_off %ld\n", 1315 vdp->xdf_addr, seg, vreq->v_dmac.dmac_size, blk_off)); 1316 DPRINTF(IO_DBG, ( 1317 "xdf@%s: seg%d: fs %d ls %d gr %d dma 0x%"PRIx64"\n", 1318 vdp->xdf_addr, seg, fsect, lsect, gr, dma_addr)); 1319 1320 blkno += (bcnt >> XB_BSHIFT); 1321 seg++; 1322 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1323 if (--ndmacs) { 1324 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1325 continue; 1326 } 1327 1328 vreq->v_status = VREQ_DMAWIN_DONE; 1329 vreq->v_blkno = blkno; 1330 break; 1331 } 1332 ddi_put8(acchdl, &rreq->nr_segments, seg); 1333 DPRINTF(IO_DBG, ( 1334 "xdf@%s: xdf_process_rreq: request id=%"PRIx64" ready\n", 1335 vdp->xdf_addr, rreq->id)); 1336 } 1337 1338 static void 1339 xdf_io_start(xdf_t *vdp) 1340 { 1341 struct buf *bp; 1342 v_req_t *vreq; 1343 blkif_request_t *rreq; 1344 boolean_t rreqready = B_FALSE; 1345 1346 mutex_enter(&vdp->xdf_dev_lk); 1347 1348 /* 1349 * Populate the ring request(s). Loop until there is no buf to 1350 * transfer or no free slot available in I/O ring. 1351 */ 1352 for (;;) { 1353 /* don't start any new IO if we're suspending */ 1354 if (vdp->xdf_suspending) 1355 break; 1356 if ((bp = xdf_bp_next(vdp)) == NULL) 1357 break; 1358 1359 /* if the buf doesn't already have a vreq, allocate one */ 1360 if (((vreq = BP_VREQ(bp)) == NULL) && 1361 ((vreq = vreq_get(vdp, bp)) == NULL)) 1362 break; 1363 1364 /* alloc DMA/GTE resources */ 1365 if (vreq_setup(vdp, vreq) != DDI_SUCCESS) 1366 break; 1367 1368 /* get next blkif_request in the ring */ 1369 if ((rreq = xvdi_ring_get_request(vdp->xdf_xb_ring)) == NULL) 1370 break; 1371 bzero(rreq, sizeof (blkif_request_t)); 1372 rreqready = B_TRUE; 1373 1374 /* populate blkif_request with this buf */ 1375 xdf_process_rreq(vdp, bp, rreq); 1376 1377 /* 1378 * This buffer/vreq pair is has been allocated a ring buffer 1379 * resources, so if it isn't already in our runq, add it. 1380 */ 1381 if (!vreq->v_runq) 1382 xdf_kstat_waitq_to_runq(vdp, bp); 1383 } 1384 1385 /* Send the request(s) to the backend */ 1386 if (rreqready) 1387 xdf_ring_push(vdp); 1388 1389 mutex_exit(&vdp->xdf_dev_lk); 1390 } 1391 1392 1393 /* check if partition is open, -1 - check all partitions on the disk */ 1394 static boolean_t 1395 xdf_isopen(xdf_t *vdp, int partition) 1396 { 1397 int i; 1398 ulong_t parbit; 1399 boolean_t rval = B_FALSE; 1400 1401 ASSERT((partition == -1) || 1402 ((partition >= 0) || (partition < XDF_PEXT))); 1403 1404 if (partition == -1) 1405 parbit = (ulong_t)-1; 1406 else 1407 parbit = 1 << partition; 1408 1409 for (i = 0; i < OTYPCNT; i++) { 1410 if (vdp->xdf_vd_open[i] & parbit) 1411 rval = B_TRUE; 1412 } 1413 1414 return (rval); 1415 } 1416 1417 /* 1418 * The connection should never be closed as long as someone is holding 1419 * us open, there is pending IO, or someone is waiting waiting for a 1420 * connection. 1421 */ 1422 static boolean_t 1423 xdf_busy(xdf_t *vdp) 1424 { 1425 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1426 1427 if ((vdp->xdf_xb_ring != NULL) && 1428 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 1429 ASSERT(vdp->xdf_state != XD_CLOSED); 1430 return (B_TRUE); 1431 } 1432 1433 if (!list_is_empty(&vdp->xdf_vreq_act) || (vdp->xdf_f_act != NULL)) { 1434 ASSERT(vdp->xdf_state != XD_CLOSED); 1435 return (B_TRUE); 1436 } 1437 1438 if (xdf_isopen(vdp, -1)) { 1439 ASSERT(vdp->xdf_state != XD_CLOSED); 1440 return (B_TRUE); 1441 } 1442 1443 if (vdp->xdf_connect_req > 0) { 1444 ASSERT(vdp->xdf_state != XD_CLOSED); 1445 return (B_TRUE); 1446 } 1447 1448 return (B_FALSE); 1449 } 1450 1451 static void 1452 xdf_set_state(xdf_t *vdp, xdf_state_t new_state) 1453 { 1454 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1455 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1456 DPRINTF(DDI_DBG, ("xdf@%s: state change %d -> %d\n", 1457 vdp->xdf_addr, vdp->xdf_state, new_state)); 1458 vdp->xdf_state = new_state; 1459 cv_broadcast(&vdp->xdf_dev_cv); 1460 } 1461 1462 static void 1463 xdf_disconnect(xdf_t *vdp, xdf_state_t new_state, boolean_t quiet) 1464 { 1465 dev_info_t *dip = vdp->xdf_dip; 1466 boolean_t busy; 1467 1468 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1469 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1470 ASSERT((new_state == XD_UNKNOWN) || (new_state == XD_CLOSED)); 1471 1472 /* Check if we're already there. */ 1473 if (vdp->xdf_state == new_state) 1474 return; 1475 1476 mutex_enter(&vdp->xdf_dev_lk); 1477 busy = xdf_busy(vdp); 1478 1479 /* If we're already closed then there's nothing todo. */ 1480 if (vdp->xdf_state == XD_CLOSED) { 1481 ASSERT(!busy); 1482 xdf_set_state(vdp, new_state); 1483 mutex_exit(&vdp->xdf_dev_lk); 1484 return; 1485 } 1486 1487 #ifdef DEBUG 1488 /* UhOh. Warn the user that something bad has happened. */ 1489 if (!quiet && busy && (vdp->xdf_state == XD_READY) && 1490 (vdp->xdf_xdev_nblocks != 0)) { 1491 cmn_err(CE_WARN, "xdf@%s: disconnected while in use", 1492 vdp->xdf_addr); 1493 } 1494 #endif /* DEBUG */ 1495 1496 xdf_ring_destroy(vdp); 1497 1498 /* If we're busy then we can only go into the unknown state */ 1499 xdf_set_state(vdp, (busy) ? XD_UNKNOWN : new_state); 1500 mutex_exit(&vdp->xdf_dev_lk); 1501 1502 /* if we're closed now, let the other end know */ 1503 if (vdp->xdf_state == XD_CLOSED) 1504 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1505 } 1506 1507 1508 /* 1509 * Kick-off connect process 1510 * Status should be XD_UNKNOWN or XD_CLOSED 1511 * On success, status will be changed to XD_INIT 1512 * On error, it will be changed to XD_UNKNOWN 1513 */ 1514 static int 1515 xdf_setstate_init(xdf_t *vdp) 1516 { 1517 dev_info_t *dip = vdp->xdf_dip; 1518 xenbus_transaction_t xbt; 1519 grant_ref_t gref; 1520 char *xsname, *str; 1521 int rv; 1522 1523 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1524 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1525 ASSERT((vdp->xdf_state == XD_UNKNOWN) || 1526 (vdp->xdf_state == XD_CLOSED)); 1527 1528 DPRINTF(DDI_DBG, 1529 ("xdf@%s: starting connection process\n", vdp->xdf_addr)); 1530 1531 /* 1532 * If an eject is pending then don't allow a new connection. 1533 * (Only the backend can clear media request eject request.) 1534 */ 1535 if (xdf_eject_pending(vdp)) 1536 return (DDI_FAILURE); 1537 1538 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1539 goto errout; 1540 1541 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == INVALID_DOMID) 1542 goto errout; 1543 1544 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialising); 1545 1546 /* 1547 * Sanity check for the existance of the xenbus device-type property. 1548 * This property might not exist if our xenbus device nodes were 1549 * force destroyed while we were still connected to the backend. 1550 */ 1551 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) 1552 goto errout; 1553 strfree(str); 1554 1555 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) 1556 goto errout; 1557 1558 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1559 #ifdef XPV_HVM_DRIVER 1560 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1561 #else /* !XPV_HVM_DRIVER */ 1562 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1563 DDI_SUCCESS) { 1564 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_init: " 1565 "failed to add intr handler", vdp->xdf_addr); 1566 goto errout1; 1567 } 1568 #endif /* !XPV_HVM_DRIVER */ 1569 1570 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1571 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1572 DDI_SUCCESS) { 1573 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1574 vdp->xdf_addr); 1575 goto errout2; 1576 } 1577 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1578 1579 /* 1580 * Write into xenstore the info needed by backend 1581 */ 1582 trans_retry: 1583 if (xenbus_transaction_start(&xbt)) { 1584 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1585 vdp->xdf_addr); 1586 xvdi_fatal_error(dip, EIO, "connect transaction init"); 1587 goto fail_trans; 1588 } 1589 1590 /* 1591 * XBP_PROTOCOL is written by the domain builder in the case of PV 1592 * domains. However, it is not written for HVM domains, so let's 1593 * write it here. 1594 */ 1595 if (((rv = xenbus_printf(xbt, xsname, 1596 XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE)) != 0) || 1597 ((rv = xenbus_printf(xbt, xsname, 1598 XBP_RING_REF, "%u", gref)) != 0) || 1599 ((rv = xenbus_printf(xbt, xsname, 1600 XBP_EVENT_CHAN, "%u", vdp->xdf_evtchn)) != 0) || 1601 ((rv = xenbus_printf(xbt, xsname, 1602 XBP_PROTOCOL, "%s", XEN_IO_PROTO_ABI_NATIVE)) != 0) || 1603 ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0)) { 1604 (void) xenbus_transaction_end(xbt, 1); 1605 xvdi_fatal_error(dip, rv, "connect transaction setup"); 1606 goto fail_trans; 1607 } 1608 1609 /* kick-off connect process */ 1610 if (rv = xenbus_transaction_end(xbt, 0)) { 1611 if (rv == EAGAIN) 1612 goto trans_retry; 1613 xvdi_fatal_error(dip, rv, "connect transaction commit"); 1614 goto fail_trans; 1615 } 1616 1617 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1618 mutex_enter(&vdp->xdf_dev_lk); 1619 xdf_set_state(vdp, XD_INIT); 1620 mutex_exit(&vdp->xdf_dev_lk); 1621 1622 return (DDI_SUCCESS); 1623 1624 fail_trans: 1625 xvdi_free_ring(vdp->xdf_xb_ring); 1626 errout2: 1627 #ifdef XPV_HVM_DRIVER 1628 ec_unbind_evtchn(vdp->xdf_evtchn); 1629 #else /* !XPV_HVM_DRIVER */ 1630 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1631 #endif /* !XPV_HVM_DRIVER */ 1632 errout1: 1633 xvdi_free_evtchn(dip); 1634 vdp->xdf_evtchn = INVALID_EVTCHN; 1635 errout: 1636 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1637 cmn_err(CE_WARN, "xdf@%s: failed to start connection to backend", 1638 vdp->xdf_addr); 1639 return (DDI_FAILURE); 1640 } 1641 1642 int 1643 xdf_get_flush_block(xdf_t *vdp) 1644 { 1645 /* 1646 * Get a DEV_BSIZE aligned bufer 1647 */ 1648 vdp->xdf_flush_mem = kmem_alloc(vdp->xdf_xdev_secsize * 2, KM_SLEEP); 1649 vdp->xdf_cache_flush_block = 1650 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), 1651 (int)vdp->xdf_xdev_secsize); 1652 1653 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1654 xdf_flush_block, vdp->xdf_xdev_secsize, NULL) != 0) 1655 return (DDI_FAILURE); 1656 return (DDI_SUCCESS); 1657 } 1658 1659 static void 1660 xdf_setstate_ready(void *arg) 1661 { 1662 xdf_t *vdp = (xdf_t *)arg; 1663 dev_info_t *dip = vdp->xdf_dip; 1664 1665 vdp->xdf_ready_tq_thread = curthread; 1666 1667 /* Create minor nodes now when we are almost connected */ 1668 mutex_enter(&vdp->xdf_dev_lk); 1669 if (vdp->xdf_cmlb_reattach) { 1670 vdp->xdf_cmlb_reattach = B_FALSE; 1671 mutex_exit(&vdp->xdf_dev_lk); 1672 if (xdf_cmlb_attach(vdp) != 0) { 1673 cmn_err(CE_WARN, 1674 "xdf@%s: cmlb attach failed", 1675 ddi_get_name_addr(dip)); 1676 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1677 return; 1678 } 1679 mutex_enter(&vdp->xdf_dev_lk); 1680 } 1681 1682 /* If we're not still trying to get to the ready state, then bail. */ 1683 if (vdp->xdf_state != XD_CONNECTED) { 1684 mutex_exit(&vdp->xdf_dev_lk); 1685 return; 1686 } 1687 mutex_exit(&vdp->xdf_dev_lk); 1688 1689 /* 1690 * If backend has feature-barrier, see if it supports disk 1691 * cache flush op. 1692 */ 1693 vdp->xdf_flush_supported = B_FALSE; 1694 if (vdp->xdf_feature_barrier) { 1695 /* 1696 * Pretend we already know flush is supported so probe 1697 * will attempt the correct op. 1698 */ 1699 vdp->xdf_flush_supported = B_TRUE; 1700 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1701 vdp->xdf_flush_supported = B_TRUE; 1702 } else { 1703 vdp->xdf_flush_supported = B_FALSE; 1704 /* 1705 * If the other end does not support the cache flush op 1706 * then we must use a barrier-write to force disk 1707 * cache flushing. Barrier writes require that a data 1708 * block actually be written. 1709 * Cache a block to barrier-write when we are 1710 * asked to perform a flush. 1711 * XXX - would it be better to just copy 1 block 1712 * (512 bytes) from whatever write we did last 1713 * and rewrite that block? 1714 */ 1715 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) { 1716 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1717 return; 1718 } 1719 } 1720 } 1721 1722 mutex_enter(&vdp->xdf_cb_lk); 1723 mutex_enter(&vdp->xdf_dev_lk); 1724 if (vdp->xdf_state == XD_CONNECTED) 1725 xdf_set_state(vdp, XD_READY); 1726 mutex_exit(&vdp->xdf_dev_lk); 1727 1728 /* Restart any currently queued up io */ 1729 xdf_io_start(vdp); 1730 1731 mutex_exit(&vdp->xdf_cb_lk); 1732 } 1733 1734 /* 1735 * synthetic geometry 1736 */ 1737 #define XDF_NSECTS 256 1738 #define XDF_NHEADS 16 1739 1740 static void 1741 xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp) 1742 { 1743 xdf_t *vdp; 1744 uint_t ncyl; 1745 1746 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 1747 1748 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); 1749 1750 bzero(geomp, sizeof (*geomp)); 1751 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1752 geomp->g_acyl = 0; 1753 geomp->g_nhead = XDF_NHEADS; 1754 geomp->g_nsect = XDF_NSECTS; 1755 geomp->g_secsize = vdp->xdf_xdev_secsize; 1756 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1757 geomp->g_intrlv = 0; 1758 geomp->g_rpm = 7200; 1759 } 1760 1761 /* 1762 * Finish other initialization after we've connected to backend 1763 * Status should be XD_INIT before calling this routine 1764 * On success, status should be changed to XD_CONNECTED. 1765 * On error, status should stay XD_INIT 1766 */ 1767 static int 1768 xdf_setstate_connected(xdf_t *vdp) 1769 { 1770 dev_info_t *dip = vdp->xdf_dip; 1771 cmlb_geom_t pgeom; 1772 diskaddr_t nblocks = 0; 1773 uint_t secsize = 0; 1774 char *oename, *xsname, *str; 1775 uint_t dinfo; 1776 1777 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1778 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1779 ASSERT(vdp->xdf_state == XD_INIT); 1780 1781 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1782 ((oename = xvdi_get_oename(dip)) == NULL)) 1783 return (DDI_FAILURE); 1784 1785 /* Make sure the other end is XenbusStateConnected */ 1786 if (xenbus_read_driver_state(oename) != XenbusStateConnected) 1787 return (DDI_FAILURE); 1788 1789 /* Determine if feature barrier is supported by backend */ 1790 if (!(vdp->xdf_feature_barrier = xenbus_exists(oename, XBP_FB))) 1791 cmn_err(CE_NOTE, "!xdf@%s: feature-barrier not supported", 1792 vdp->xdf_addr); 1793 1794 /* 1795 * Probe backend. Read the device size into xdf_xdev_nblocks 1796 * and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE 1797 * flags in xdf_dinfo. If the emulated device type is "cdrom", 1798 * we always set VDISK_CDROM, regardless of if it's present in 1799 * the xenbus info parameter. 1800 */ 1801 if (xenbus_gather(XBT_NULL, oename, 1802 XBP_SECTORS, "%"SCNu64, &nblocks, 1803 XBP_SECTOR_SIZE, "%u", &secsize, 1804 XBP_INFO, "%u", &dinfo, 1805 NULL) != 0) { 1806 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1807 "cannot read backend info", vdp->xdf_addr); 1808 return (DDI_FAILURE); 1809 } 1810 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 1811 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 1812 vdp->xdf_addr); 1813 return (DDI_FAILURE); 1814 } 1815 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 1816 dinfo |= VDISK_CDROM; 1817 strfree(str); 1818 1819 if (secsize == 0 || !(ISP2(secsize / DEV_BSIZE))) 1820 secsize = DEV_BSIZE; 1821 vdp->xdf_xdev_nblocks = nblocks; 1822 vdp->xdf_xdev_secsize = secsize; 1823 #ifdef _ILP32 1824 if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) { 1825 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1826 "backend disk device too large with %llu blocks for" 1827 " 32-bit kernel", vdp->xdf_addr, vdp->xdf_xdev_nblocks); 1828 xvdi_fatal_error(dip, EFBIG, "reading backend info"); 1829 return (DDI_FAILURE); 1830 } 1831 #endif 1832 1833 /* 1834 * If the physical geometry for a fixed disk has been explicity 1835 * set then make sure that the specified physical geometry isn't 1836 * larger than the device we connected to. 1837 */ 1838 if (vdp->xdf_pgeom_fixed && 1839 (vdp->xdf_pgeom.g_capacity > vdp->xdf_xdev_nblocks)) { 1840 cmn_err(CE_WARN, 1841 "xdf@%s: connect failed, fixed geometry too large", 1842 vdp->xdf_addr); 1843 return (DDI_FAILURE); 1844 } 1845 1846 vdp->xdf_media_req_supported = xenbus_exists(oename, XBP_MEDIA_REQ_SUP); 1847 1848 /* mark vbd is ready for I/O */ 1849 mutex_enter(&vdp->xdf_dev_lk); 1850 xdf_set_state(vdp, XD_CONNECTED); 1851 1852 /* check if the cmlb label should be updated */ 1853 xdf_synthetic_pgeom(dip, &pgeom); 1854 if ((vdp->xdf_dinfo != dinfo) || 1855 (!vdp->xdf_pgeom_fixed && 1856 (memcmp(&vdp->xdf_pgeom, &pgeom, sizeof (pgeom)) != 0))) { 1857 vdp->xdf_cmlb_reattach = B_TRUE; 1858 1859 vdp->xdf_dinfo = dinfo; 1860 if (!vdp->xdf_pgeom_fixed) 1861 vdp->xdf_pgeom = pgeom; 1862 } 1863 1864 if (XD_IS_CD(vdp) || XD_IS_RM(vdp)) { 1865 if (vdp->xdf_xdev_nblocks == 0) { 1866 vdp->xdf_mstate = DKIO_EJECTED; 1867 cv_broadcast(&vdp->xdf_mstate_cv); 1868 } else { 1869 vdp->xdf_mstate = DKIO_INSERTED; 1870 cv_broadcast(&vdp->xdf_mstate_cv); 1871 } 1872 } else { 1873 if (vdp->xdf_mstate != DKIO_NONE) { 1874 vdp->xdf_mstate = DKIO_NONE; 1875 cv_broadcast(&vdp->xdf_mstate_cv); 1876 } 1877 } 1878 1879 mutex_exit(&vdp->xdf_dev_lk); 1880 1881 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", vdp->xdf_addr, 1882 (uint64_t)vdp->xdf_xdev_nblocks); 1883 1884 /* Restart any currently queued up io */ 1885 xdf_io_start(vdp); 1886 1887 /* 1888 * To get to the ready state we have to do IO to the backend device, 1889 * but we can't initiate IO from the other end change callback thread 1890 * (which is the current context we're executing in.) This is because 1891 * if the other end disconnects while we're doing IO from the callback 1892 * thread, then we can't receive that disconnect event and we hang 1893 * waiting for an IO that can never complete. 1894 */ 1895 (void) ddi_taskq_dispatch(vdp->xdf_ready_tq, xdf_setstate_ready, vdp, 1896 DDI_SLEEP); 1897 1898 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1899 return (DDI_SUCCESS); 1900 } 1901 1902 /*ARGSUSED*/ 1903 static void 1904 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 1905 { 1906 XenbusState new_state = *(XenbusState *)impl_data; 1907 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 1908 1909 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 1910 vdp->xdf_addr, new_state)); 1911 1912 mutex_enter(&vdp->xdf_cb_lk); 1913 1914 /* We assume that this callback is single threaded */ 1915 ASSERT(vdp->xdf_oe_change_thread == NULL); 1916 DEBUG_EVAL(vdp->xdf_oe_change_thread = curthread); 1917 1918 /* ignore any backend state changes if we're suspending/suspended */ 1919 if (vdp->xdf_suspending || (vdp->xdf_state == XD_SUSPEND)) { 1920 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1921 mutex_exit(&vdp->xdf_cb_lk); 1922 return; 1923 } 1924 1925 switch (new_state) { 1926 case XenbusStateUnknown: 1927 case XenbusStateInitialising: 1928 case XenbusStateInitWait: 1929 case XenbusStateInitialised: 1930 if (vdp->xdf_state == XD_INIT) 1931 break; 1932 1933 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1934 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1935 break; 1936 ASSERT(vdp->xdf_state == XD_INIT); 1937 break; 1938 1939 case XenbusStateConnected: 1940 if ((vdp->xdf_state == XD_CONNECTED) || 1941 (vdp->xdf_state == XD_READY)) 1942 break; 1943 1944 if (vdp->xdf_state != XD_INIT) { 1945 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1946 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1947 break; 1948 ASSERT(vdp->xdf_state == XD_INIT); 1949 } 1950 1951 if (xdf_setstate_connected(vdp) != DDI_SUCCESS) { 1952 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1953 break; 1954 } 1955 ASSERT(vdp->xdf_state == XD_CONNECTED); 1956 break; 1957 1958 case XenbusStateClosing: 1959 if (xdf_isopen(vdp, -1)) { 1960 cmn_err(CE_NOTE, 1961 "xdf@%s: hot-unplug failed, still in use", 1962 vdp->xdf_addr); 1963 break; 1964 } 1965 /*FALLTHROUGH*/ 1966 case XenbusStateClosed: 1967 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 1968 break; 1969 } 1970 1971 /* notify anybody waiting for oe state change */ 1972 cv_broadcast(&vdp->xdf_dev_cv); 1973 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1974 mutex_exit(&vdp->xdf_cb_lk); 1975 } 1976 1977 static int 1978 xdf_connect_locked(xdf_t *vdp, boolean_t wait) 1979 { 1980 int rv, timeouts = 0, reset = 20; 1981 1982 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1983 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1984 1985 /* we can't connect once we're in the closed state */ 1986 if (vdp->xdf_state == XD_CLOSED) 1987 return (XD_CLOSED); 1988 1989 vdp->xdf_connect_req++; 1990 while (vdp->xdf_state != XD_READY) { 1991 mutex_exit(&vdp->xdf_dev_lk); 1992 1993 /* only one thread at a time can be the connection thread */ 1994 if (vdp->xdf_connect_thread == NULL) 1995 vdp->xdf_connect_thread = curthread; 1996 1997 if (vdp->xdf_connect_thread == curthread) { 1998 if ((timeouts > 0) && ((timeouts % reset) == 0)) { 1999 /* 2000 * If we haven't establised a connection 2001 * within the reset time, then disconnect 2002 * so we can try again, and double the reset 2003 * time. The reset time starts at 2 sec. 2004 */ 2005 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 2006 reset *= 2; 2007 } 2008 if (vdp->xdf_state == XD_UNKNOWN) 2009 (void) xdf_setstate_init(vdp); 2010 if (vdp->xdf_state == XD_INIT) 2011 (void) xdf_setstate_connected(vdp); 2012 } 2013 2014 mutex_enter(&vdp->xdf_dev_lk); 2015 if (!wait || (vdp->xdf_state == XD_READY)) 2016 goto out; 2017 2018 mutex_exit((&vdp->xdf_cb_lk)); 2019 if (vdp->xdf_connect_thread != curthread) { 2020 rv = cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk); 2021 } else { 2022 /* delay for 0.1 sec */ 2023 rv = cv_reltimedwait_sig(&vdp->xdf_dev_cv, 2024 &vdp->xdf_dev_lk, drv_usectohz(100*1000), 2025 TR_CLOCK_TICK); 2026 if (rv == -1) 2027 timeouts++; 2028 } 2029 mutex_exit((&vdp->xdf_dev_lk)); 2030 mutex_enter((&vdp->xdf_cb_lk)); 2031 mutex_enter((&vdp->xdf_dev_lk)); 2032 if (rv == 0) 2033 goto out; 2034 } 2035 2036 out: 2037 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2038 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 2039 2040 if (vdp->xdf_connect_thread == curthread) { 2041 /* 2042 * wake up someone else so they can become the connection 2043 * thread. 2044 */ 2045 cv_signal(&vdp->xdf_dev_cv); 2046 vdp->xdf_connect_thread = NULL; 2047 } 2048 2049 /* Try to lock the media */ 2050 mutex_exit((&vdp->xdf_dev_lk)); 2051 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2052 mutex_enter((&vdp->xdf_dev_lk)); 2053 2054 vdp->xdf_connect_req--; 2055 return (vdp->xdf_state); 2056 } 2057 2058 static uint_t 2059 xdf_iorestart(caddr_t arg) 2060 { 2061 xdf_t *vdp = (xdf_t *)arg; 2062 2063 ASSERT(vdp != NULL); 2064 2065 mutex_enter(&vdp->xdf_dev_lk); 2066 ASSERT(ISDMACBON(vdp)); 2067 SETDMACBOFF(vdp); 2068 mutex_exit(&vdp->xdf_dev_lk); 2069 2070 xdf_io_start(vdp); 2071 2072 return (DDI_INTR_CLAIMED); 2073 } 2074 2075 #ifdef XPV_HVM_DRIVER 2076 2077 typedef struct xdf_hvm_entry { 2078 list_node_t xdf_he_list; 2079 char *xdf_he_path; 2080 dev_info_t *xdf_he_dip; 2081 } xdf_hvm_entry_t; 2082 2083 static list_t xdf_hvm_list; 2084 static kmutex_t xdf_hvm_list_lock; 2085 2086 static xdf_hvm_entry_t * 2087 i_xdf_hvm_find(const char *path, dev_info_t *dip) 2088 { 2089 xdf_hvm_entry_t *i; 2090 2091 ASSERT((path != NULL) || (dip != NULL)); 2092 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); 2093 2094 i = list_head(&xdf_hvm_list); 2095 while (i != NULL) { 2096 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { 2097 i = list_next(&xdf_hvm_list, i); 2098 continue; 2099 } 2100 if ((dip != NULL) && (i->xdf_he_dip != dip)) { 2101 i = list_next(&xdf_hvm_list, i); 2102 continue; 2103 } 2104 break; 2105 } 2106 return (i); 2107 } 2108 2109 dev_info_t * 2110 xdf_hvm_hold(const char *path) 2111 { 2112 xdf_hvm_entry_t *i; 2113 dev_info_t *dip; 2114 2115 mutex_enter(&xdf_hvm_list_lock); 2116 i = i_xdf_hvm_find(path, NULL); 2117 if (i == NULL) { 2118 mutex_exit(&xdf_hvm_list_lock); 2119 return (B_FALSE); 2120 } 2121 ndi_hold_devi(dip = i->xdf_he_dip); 2122 mutex_exit(&xdf_hvm_list_lock); 2123 return (dip); 2124 } 2125 2126 static void 2127 xdf_hvm_add(dev_info_t *dip) 2128 { 2129 xdf_hvm_entry_t *i; 2130 char *path; 2131 2132 /* figure out the path for the dip */ 2133 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 2134 (void) ddi_pathname(dip, path); 2135 2136 i = kmem_alloc(sizeof (*i), KM_SLEEP); 2137 i->xdf_he_dip = dip; 2138 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); 2139 2140 mutex_enter(&xdf_hvm_list_lock); 2141 ASSERT(i_xdf_hvm_find(path, NULL) == NULL); 2142 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); 2143 list_insert_head(&xdf_hvm_list, i); 2144 mutex_exit(&xdf_hvm_list_lock); 2145 2146 kmem_free(path, MAXPATHLEN); 2147 } 2148 2149 static void 2150 xdf_hvm_rm(dev_info_t *dip) 2151 { 2152 xdf_hvm_entry_t *i; 2153 2154 mutex_enter(&xdf_hvm_list_lock); 2155 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); 2156 list_remove(&xdf_hvm_list, i); 2157 mutex_exit(&xdf_hvm_list_lock); 2158 2159 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); 2160 kmem_free(i, sizeof (*i)); 2161 } 2162 2163 static void 2164 xdf_hvm_init(void) 2165 { 2166 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), 2167 offsetof(xdf_hvm_entry_t, xdf_he_list)); 2168 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); 2169 } 2170 2171 static void 2172 xdf_hvm_fini(void) 2173 { 2174 ASSERT(list_head(&xdf_hvm_list) == NULL); 2175 list_destroy(&xdf_hvm_list); 2176 mutex_destroy(&xdf_hvm_list_lock); 2177 } 2178 2179 boolean_t 2180 xdf_hvm_connect(dev_info_t *dip) 2181 { 2182 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2183 char *oename, *str; 2184 int rv; 2185 2186 mutex_enter(&vdp->xdf_cb_lk); 2187 2188 /* 2189 * Before try to establish a connection we need to wait for the 2190 * backend hotplug scripts to have run. Once they are run the 2191 * "<oename>/hotplug-status" property will be set to "connected". 2192 */ 2193 for (;;) { 2194 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2195 2196 /* 2197 * Get the xenbus path to the backend device. Note that 2198 * we can't cache this path (and we look it up on each pass 2199 * through this loop) because it could change during 2200 * suspend, resume, and migration operations. 2201 */ 2202 if ((oename = xvdi_get_oename(dip)) == NULL) { 2203 mutex_exit(&vdp->xdf_cb_lk); 2204 return (B_FALSE); 2205 } 2206 2207 str = NULL; 2208 if ((xenbus_read_str(oename, XBP_HP_STATUS, &str) == 0) && 2209 (strcmp(str, XBV_HP_STATUS_CONN) == 0)) 2210 break; 2211 2212 if (str != NULL) 2213 strfree(str); 2214 2215 /* wait for an update to "<oename>/hotplug-status" */ 2216 if (cv_wait_sig(&vdp->xdf_hp_status_cv, &vdp->xdf_cb_lk) == 0) { 2217 /* we got interrupted by a signal */ 2218 mutex_exit(&vdp->xdf_cb_lk); 2219 return (B_FALSE); 2220 } 2221 } 2222 2223 /* Good news. The backend hotplug scripts have been run. */ 2224 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2225 ASSERT(strcmp(str, XBV_HP_STATUS_CONN) == 0); 2226 strfree(str); 2227 2228 /* 2229 * If we're emulating a cd device and if the backend doesn't support 2230 * media request opreations, then we're not going to bother trying 2231 * to establish a connection for a couple reasons. First off, media 2232 * requests support is required to support operations like eject and 2233 * media locking. Second, other backend platforms like Linux don't 2234 * support hvm pv cdrom access. They don't even have a backend pv 2235 * driver for cdrom device nodes, so we don't want to block forever 2236 * waiting for a connection to a backend driver that doesn't exist. 2237 */ 2238 if (XD_IS_CD(vdp) && !xenbus_exists(oename, XBP_MEDIA_REQ_SUP)) { 2239 mutex_exit(&vdp->xdf_cb_lk); 2240 return (B_FALSE); 2241 } 2242 2243 mutex_enter(&vdp->xdf_dev_lk); 2244 rv = xdf_connect_locked(vdp, B_TRUE); 2245 mutex_exit(&vdp->xdf_dev_lk); 2246 mutex_exit(&vdp->xdf_cb_lk); 2247 2248 return ((rv == XD_READY) ? B_TRUE : B_FALSE); 2249 } 2250 2251 int 2252 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2253 { 2254 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2255 2256 /* sanity check the requested physical geometry */ 2257 mutex_enter(&vdp->xdf_dev_lk); 2258 if ((geomp->g_secsize != XB_BSIZE) || 2259 (geomp->g_capacity == 0)) { 2260 mutex_exit(&vdp->xdf_dev_lk); 2261 return (EINVAL); 2262 } 2263 2264 /* 2265 * If we've already connected to the backend device then make sure 2266 * we're not defining a physical geometry larger than our backend 2267 * device. 2268 */ 2269 if ((vdp->xdf_xdev_nblocks != 0) && 2270 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { 2271 mutex_exit(&vdp->xdf_dev_lk); 2272 return (EINVAL); 2273 } 2274 2275 bzero(&vdp->xdf_pgeom, sizeof (vdp->xdf_pgeom)); 2276 vdp->xdf_pgeom.g_ncyl = geomp->g_ncyl; 2277 vdp->xdf_pgeom.g_acyl = geomp->g_acyl; 2278 vdp->xdf_pgeom.g_nhead = geomp->g_nhead; 2279 vdp->xdf_pgeom.g_nsect = geomp->g_nsect; 2280 vdp->xdf_pgeom.g_secsize = geomp->g_secsize; 2281 vdp->xdf_pgeom.g_capacity = geomp->g_capacity; 2282 vdp->xdf_pgeom.g_intrlv = geomp->g_intrlv; 2283 vdp->xdf_pgeom.g_rpm = geomp->g_rpm; 2284 2285 vdp->xdf_pgeom_fixed = B_TRUE; 2286 mutex_exit(&vdp->xdf_dev_lk); 2287 2288 /* force a re-validation */ 2289 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 2290 2291 return (0); 2292 } 2293 2294 boolean_t 2295 xdf_is_cd(dev_info_t *dip) 2296 { 2297 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2298 boolean_t rv; 2299 2300 mutex_enter(&vdp->xdf_cb_lk); 2301 rv = XD_IS_CD(vdp); 2302 mutex_exit(&vdp->xdf_cb_lk); 2303 return (rv); 2304 } 2305 2306 boolean_t 2307 xdf_is_rm(dev_info_t *dip) 2308 { 2309 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2310 boolean_t rv; 2311 2312 mutex_enter(&vdp->xdf_cb_lk); 2313 rv = XD_IS_RM(vdp); 2314 mutex_exit(&vdp->xdf_cb_lk); 2315 return (rv); 2316 } 2317 2318 boolean_t 2319 xdf_media_req_supported(dev_info_t *dip) 2320 { 2321 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2322 boolean_t rv; 2323 2324 mutex_enter(&vdp->xdf_cb_lk); 2325 rv = vdp->xdf_media_req_supported; 2326 mutex_exit(&vdp->xdf_cb_lk); 2327 return (rv); 2328 } 2329 2330 #endif /* XPV_HVM_DRIVER */ 2331 2332 static int 2333 xdf_lb_getcap(dev_info_t *dip, diskaddr_t *capp) 2334 { 2335 xdf_t *vdp; 2336 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2337 2338 if (vdp == NULL) 2339 return (ENXIO); 2340 2341 mutex_enter(&vdp->xdf_dev_lk); 2342 *capp = vdp->xdf_pgeom.g_capacity; 2343 DPRINTF(LBL_DBG, ("xdf@%s:capacity %llu\n", vdp->xdf_addr, *capp)); 2344 mutex_exit(&vdp->xdf_dev_lk); 2345 return (0); 2346 } 2347 2348 static int 2349 xdf_lb_getpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2350 { 2351 xdf_t *vdp; 2352 2353 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 2354 return (ENXIO); 2355 *geomp = vdp->xdf_pgeom; 2356 return (0); 2357 } 2358 2359 /* 2360 * No real HBA, no geometry available from it 2361 */ 2362 /*ARGSUSED*/ 2363 static int 2364 xdf_lb_getvgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2365 { 2366 return (EINVAL); 2367 } 2368 2369 static int 2370 xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep) 2371 { 2372 xdf_t *vdp; 2373 2374 if (!(vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)))) 2375 return (ENXIO); 2376 2377 if (XD_IS_RO(vdp)) 2378 tgattributep->media_is_writable = 0; 2379 else 2380 tgattributep->media_is_writable = 1; 2381 tgattributep->media_is_rotational = 0; 2382 return (0); 2383 } 2384 2385 /* ARGSUSED3 */ 2386 int 2387 xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 2388 { 2389 int instance; 2390 xdf_t *vdp; 2391 2392 instance = ddi_get_instance(dip); 2393 2394 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) 2395 return (ENXIO); 2396 2397 switch (cmd) { 2398 case TG_GETPHYGEOM: 2399 return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg)); 2400 case TG_GETVIRTGEOM: 2401 return (xdf_lb_getvgeom(dip, (cmlb_geom_t *)arg)); 2402 case TG_GETCAPACITY: 2403 return (xdf_lb_getcap(dip, (diskaddr_t *)arg)); 2404 case TG_GETBLOCKSIZE: 2405 mutex_enter(&vdp->xdf_cb_lk); 2406 *(uint32_t *)arg = vdp->xdf_xdev_secsize; 2407 mutex_exit(&vdp->xdf_cb_lk); 2408 return (0); 2409 case TG_GETATTR: 2410 return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg)); 2411 default: 2412 return (ENOTTY); 2413 } 2414 } 2415 2416 /* ARGSUSED5 */ 2417 int 2418 xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp, 2419 diskaddr_t start, size_t reqlen, void *tg_cookie) 2420 { 2421 xdf_t *vdp; 2422 struct buf *bp; 2423 int err = 0; 2424 2425 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2426 2427 /* We don't allow IO from the oe_change callback thread */ 2428 ASSERT(curthread != vdp->xdf_oe_change_thread); 2429 2430 /* 2431 * Having secsize of 0 means that device isn't connected yet. 2432 * FIXME This happens for CD devices, and there's nothing we 2433 * can do about it at the moment. 2434 */ 2435 if (vdp->xdf_xdev_secsize == 0) 2436 return (EIO); 2437 2438 if ((start + ((reqlen / (vdp->xdf_xdev_secsize / DEV_BSIZE)) 2439 >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) 2440 return (EINVAL); 2441 2442 bp = getrbuf(KM_SLEEP); 2443 if (cmd == TG_READ) 2444 bp->b_flags = B_BUSY | B_READ; 2445 else 2446 bp->b_flags = B_BUSY | B_WRITE; 2447 2448 bp->b_un.b_addr = bufp; 2449 bp->b_bcount = reqlen; 2450 bp->b_blkno = start * (vdp->xdf_xdev_secsize / DEV_BSIZE); 2451 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 2452 2453 mutex_enter(&vdp->xdf_dev_lk); 2454 xdf_bp_push(vdp, bp); 2455 mutex_exit(&vdp->xdf_dev_lk); 2456 xdf_io_start(vdp); 2457 if (curthread == vdp->xdf_ready_tq_thread) 2458 (void) xdf_ring_drain(vdp); 2459 err = biowait(bp); 2460 ASSERT(bp->b_flags & B_DONE); 2461 freerbuf(bp); 2462 return (err); 2463 } 2464 2465 /* 2466 * Lock the current media. Set the media state to "lock". 2467 * (Media locks are only respected by the backend driver.) 2468 */ 2469 static int 2470 xdf_ioctl_mlock(xdf_t *vdp) 2471 { 2472 int rv; 2473 mutex_enter(&vdp->xdf_cb_lk); 2474 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2475 mutex_exit(&vdp->xdf_cb_lk); 2476 return (rv); 2477 } 2478 2479 /* 2480 * Release a media lock. Set the media state to "none". 2481 */ 2482 static int 2483 xdf_ioctl_munlock(xdf_t *vdp) 2484 { 2485 int rv; 2486 mutex_enter(&vdp->xdf_cb_lk); 2487 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_NONE, B_TRUE); 2488 mutex_exit(&vdp->xdf_cb_lk); 2489 return (rv); 2490 } 2491 2492 /* 2493 * Eject the current media. Ignores any media locks. (Media locks 2494 * are only for benifit of the the backend.) 2495 */ 2496 static int 2497 xdf_ioctl_eject(xdf_t *vdp) 2498 { 2499 int rv; 2500 2501 mutex_enter(&vdp->xdf_cb_lk); 2502 if ((rv = xdf_media_req(vdp, XBV_MEDIA_REQ_EJECT, B_FALSE)) != 0) { 2503 mutex_exit(&vdp->xdf_cb_lk); 2504 return (rv); 2505 } 2506 2507 /* 2508 * We've set the media requests xenbus parameter to eject, so now 2509 * disconnect from the backend, wait for the backend to clear 2510 * the media requets xenbus paramter, and then we can reconnect 2511 * to the backend. 2512 */ 2513 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 2514 mutex_enter(&vdp->xdf_dev_lk); 2515 if (xdf_connect_locked(vdp, B_TRUE) != XD_READY) { 2516 mutex_exit(&vdp->xdf_dev_lk); 2517 mutex_exit(&vdp->xdf_cb_lk); 2518 return (EIO); 2519 } 2520 mutex_exit(&vdp->xdf_dev_lk); 2521 mutex_exit(&vdp->xdf_cb_lk); 2522 return (0); 2523 } 2524 2525 /* 2526 * Watch for media state changes. This can be an insertion of a device 2527 * (triggered by a 'xm block-configure' request in another domain) or 2528 * the ejection of a device (triggered by a local "eject" operation). 2529 * For a full description of the DKIOCSTATE ioctl behavior see dkio(7I). 2530 */ 2531 static int 2532 xdf_dkstate(xdf_t *vdp, enum dkio_state mstate) 2533 { 2534 enum dkio_state prev_state; 2535 2536 mutex_enter(&vdp->xdf_cb_lk); 2537 prev_state = vdp->xdf_mstate; 2538 2539 if (vdp->xdf_mstate == mstate) { 2540 while (vdp->xdf_mstate == prev_state) { 2541 if (cv_wait_sig(&vdp->xdf_mstate_cv, 2542 &vdp->xdf_cb_lk) == 0) { 2543 mutex_exit(&vdp->xdf_cb_lk); 2544 return (EINTR); 2545 } 2546 } 2547 } 2548 2549 if ((prev_state != DKIO_INSERTED) && 2550 (vdp->xdf_mstate == DKIO_INSERTED)) { 2551 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2552 mutex_exit(&vdp->xdf_cb_lk); 2553 return (0); 2554 } 2555 2556 mutex_exit(&vdp->xdf_cb_lk); 2557 return (0); 2558 } 2559 2560 /*ARGSUSED*/ 2561 static int 2562 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 2563 int *rvalp) 2564 { 2565 minor_t minor = getminor(dev); 2566 int part = XDF_PART(minor); 2567 xdf_t *vdp; 2568 int rv; 2569 2570 if (((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) || 2571 (!xdf_isopen(vdp, part))) 2572 return (ENXIO); 2573 2574 DPRINTF(IOCTL_DBG, ("xdf@%s:ioctl: cmd %d (0x%x)\n", 2575 vdp->xdf_addr, cmd, cmd)); 2576 2577 switch (cmd) { 2578 default: 2579 return (ENOTTY); 2580 case DKIOCG_PHYGEOM: 2581 case DKIOCG_VIRTGEOM: 2582 case DKIOCGGEOM: 2583 case DKIOCSGEOM: 2584 case DKIOCGAPART: 2585 case DKIOCSAPART: 2586 case DKIOCGVTOC: 2587 case DKIOCSVTOC: 2588 case DKIOCPARTINFO: 2589 case DKIOCGEXTVTOC: 2590 case DKIOCSEXTVTOC: 2591 case DKIOCEXTPARTINFO: 2592 case DKIOCGMBOOT: 2593 case DKIOCSMBOOT: 2594 case DKIOCGETEFI: 2595 case DKIOCSETEFI: 2596 case DKIOCSETEXTPART: 2597 case DKIOCPARTITION: 2598 rv = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 2599 rvalp, NULL); 2600 if (rv != 0) 2601 return (rv); 2602 /* 2603 * If we're labelling the disk, we have to update the geometry 2604 * in the cmlb data structures, and we also have to write a new 2605 * devid to the disk. Note that writing an EFI label currently 2606 * requires 4 ioctls, and devid setup will fail on all but the 2607 * last. 2608 */ 2609 if (cmd == DKIOCSEXTVTOC || cmd == DKIOCSVTOC || 2610 cmd == DKIOCSETEFI) { 2611 rv = cmlb_validate(vdp->xdf_vd_lbl, 0, 0); 2612 if (rv == 0) { 2613 xdf_devid_setup(vdp); 2614 } else { 2615 cmn_err(CE_WARN, 2616 "xdf@%s, labeling failed on validate", 2617 vdp->xdf_addr); 2618 } 2619 } 2620 return (rv); 2621 case FDEJECT: 2622 case DKIOCEJECT: 2623 case CDROMEJECT: 2624 return (xdf_ioctl_eject(vdp)); 2625 case DKIOCLOCK: 2626 return (xdf_ioctl_mlock(vdp)); 2627 case DKIOCUNLOCK: 2628 return (xdf_ioctl_munlock(vdp)); 2629 case CDROMREADOFFSET: { 2630 int offset = 0; 2631 if (!XD_IS_CD(vdp)) 2632 return (ENOTTY); 2633 if (ddi_copyout(&offset, (void *)arg, sizeof (int), mode)) 2634 return (EFAULT); 2635 return (0); 2636 } 2637 case DKIOCGMEDIAINFO: { 2638 struct dk_minfo media_info; 2639 2640 media_info.dki_lbsize = vdp->xdf_xdev_secsize; 2641 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; 2642 if (XD_IS_CD(vdp)) 2643 media_info.dki_media_type = DK_CDROM; 2644 else 2645 media_info.dki_media_type = DK_FIXED_DISK; 2646 2647 if (ddi_copyout(&media_info, (void *)arg, 2648 sizeof (struct dk_minfo), mode)) 2649 return (EFAULT); 2650 return (0); 2651 } 2652 case DKIOCINFO: { 2653 struct dk_cinfo info; 2654 2655 /* controller information */ 2656 if (XD_IS_CD(vdp)) 2657 info.dki_ctype = DKC_CDROM; 2658 else 2659 info.dki_ctype = DKC_VBD; 2660 2661 info.dki_cnum = 0; 2662 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 2663 2664 /* unit information */ 2665 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 2666 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 2667 info.dki_flags = DKI_FMTVOL; 2668 info.dki_partition = part; 2669 info.dki_maxtransfer = maxphys / DEV_BSIZE; 2670 info.dki_addr = 0; 2671 info.dki_space = 0; 2672 info.dki_prio = 0; 2673 info.dki_vec = 0; 2674 2675 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 2676 return (EFAULT); 2677 return (0); 2678 } 2679 case DKIOCSTATE: { 2680 enum dkio_state mstate; 2681 2682 if (ddi_copyin((void *)arg, &mstate, 2683 sizeof (mstate), mode) != 0) 2684 return (EFAULT); 2685 if ((rv = xdf_dkstate(vdp, mstate)) != 0) 2686 return (rv); 2687 mstate = vdp->xdf_mstate; 2688 if (ddi_copyout(&mstate, (void *)arg, 2689 sizeof (mstate), mode) != 0) 2690 return (EFAULT); 2691 return (0); 2692 } 2693 case DKIOCREMOVABLE: { 2694 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2695 if (ddi_copyout(&i, (caddr_t)arg, sizeof (i), mode)) 2696 return (EFAULT); 2697 return (0); 2698 } 2699 case DKIOCGETWCE: { 2700 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2701 if (ddi_copyout(&i, (void *)arg, sizeof (i), mode)) 2702 return (EFAULT); 2703 return (0); 2704 } 2705 case DKIOCSETWCE: { 2706 int i; 2707 if (ddi_copyin((void *)arg, &i, sizeof (i), mode)) 2708 return (EFAULT); 2709 vdp->xdf_wce = VOID2BOOLEAN(i); 2710 return (0); 2711 } 2712 case DKIOCFLUSHWRITECACHE: { 2713 struct dk_callback *dkc = (struct dk_callback *)arg; 2714 2715 if (vdp->xdf_flush_supported) { 2716 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2717 NULL, 0, 0, (void *)dev); 2718 } else if (vdp->xdf_feature_barrier && 2719 !xdf_barrier_flush_disable) { 2720 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2721 vdp->xdf_cache_flush_block, xdf_flush_block, 2722 vdp->xdf_xdev_secsize, (void *)dev); 2723 } else { 2724 return (ENOTTY); 2725 } 2726 if ((mode & FKIOCTL) && (dkc != NULL) && 2727 (dkc->dkc_callback != NULL)) { 2728 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 2729 /* need to return 0 after calling callback */ 2730 rv = 0; 2731 } 2732 return (rv); 2733 } 2734 } 2735 /*NOTREACHED*/ 2736 } 2737 2738 static int 2739 xdf_strategy(struct buf *bp) 2740 { 2741 xdf_t *vdp; 2742 minor_t minor; 2743 diskaddr_t p_blkct, p_blkst; 2744 daddr_t blkno; 2745 ulong_t nblks; 2746 int part; 2747 2748 minor = getminor(bp->b_edev); 2749 part = XDF_PART(minor); 2750 vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor)); 2751 2752 mutex_enter(&vdp->xdf_dev_lk); 2753 if (!xdf_isopen(vdp, part)) { 2754 mutex_exit(&vdp->xdf_dev_lk); 2755 xdf_io_err(bp, ENXIO, 0); 2756 return (0); 2757 } 2758 2759 /* We don't allow IO from the oe_change callback thread */ 2760 ASSERT(curthread != vdp->xdf_oe_change_thread); 2761 2762 /* Check for writes to a read only device */ 2763 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 2764 mutex_exit(&vdp->xdf_dev_lk); 2765 xdf_io_err(bp, EROFS, 0); 2766 return (0); 2767 } 2768 2769 /* Check if this I/O is accessing a partition or the entire disk */ 2770 if ((long)bp->b_private == XB_SLICE_NONE) { 2771 /* This I/O is using an absolute offset */ 2772 p_blkct = vdp->xdf_xdev_nblocks; 2773 p_blkst = 0; 2774 } else { 2775 /* This I/O is using a partition relative offset */ 2776 mutex_exit(&vdp->xdf_dev_lk); 2777 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 2778 &p_blkst, NULL, NULL, NULL)) { 2779 xdf_io_err(bp, ENXIO, 0); 2780 return (0); 2781 } 2782 mutex_enter(&vdp->xdf_dev_lk); 2783 } 2784 2785 /* 2786 * Adjust the real blkno and bcount according to the underline 2787 * physical sector size. 2788 */ 2789 blkno = bp->b_blkno / (vdp->xdf_xdev_secsize / XB_BSIZE); 2790 2791 /* check for a starting block beyond the disk or partition limit */ 2792 if (blkno > p_blkct) { 2793 DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64, 2794 vdp->xdf_addr, (longlong_t)blkno, (uint64_t)p_blkct)); 2795 mutex_exit(&vdp->xdf_dev_lk); 2796 xdf_io_err(bp, EINVAL, 0); 2797 return (0); 2798 } 2799 2800 /* Legacy: don't set error flag at this case */ 2801 if (blkno == p_blkct) { 2802 mutex_exit(&vdp->xdf_dev_lk); 2803 bp->b_resid = bp->b_bcount; 2804 biodone(bp); 2805 return (0); 2806 } 2807 2808 /* sanitize the input buf */ 2809 bioerror(bp, 0); 2810 bp->b_resid = 0; 2811 bp->av_back = bp->av_forw = NULL; 2812 2813 /* Adjust for partial transfer, this will result in an error later */ 2814 if (vdp->xdf_xdev_secsize != 0 && 2815 vdp->xdf_xdev_secsize != XB_BSIZE) { 2816 nblks = bp->b_bcount / vdp->xdf_xdev_secsize; 2817 } else { 2818 nblks = bp->b_bcount >> XB_BSHIFT; 2819 } 2820 2821 if ((blkno + nblks) > p_blkct) { 2822 if (vdp->xdf_xdev_secsize != 0 && 2823 vdp->xdf_xdev_secsize != XB_BSIZE) { 2824 bp->b_resid = 2825 ((blkno + nblks) - p_blkct) * 2826 vdp->xdf_xdev_secsize; 2827 } else { 2828 bp->b_resid = 2829 ((blkno + nblks) - p_blkct) << 2830 XB_BSHIFT; 2831 } 2832 bp->b_bcount -= bp->b_resid; 2833 } 2834 2835 DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n", 2836 vdp->xdf_addr, (longlong_t)blkno, (ulong_t)bp->b_bcount)); 2837 2838 /* Fix up the buf struct */ 2839 bp->b_flags |= B_BUSY; 2840 bp->b_private = (void *)(uintptr_t)p_blkst; 2841 2842 xdf_bp_push(vdp, bp); 2843 mutex_exit(&vdp->xdf_dev_lk); 2844 xdf_io_start(vdp); 2845 if (do_polled_io) 2846 (void) xdf_ring_drain(vdp); 2847 return (0); 2848 } 2849 2850 /*ARGSUSED*/ 2851 static int 2852 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 2853 { 2854 xdf_t *vdp; 2855 minor_t minor; 2856 diskaddr_t p_blkcnt; 2857 int part; 2858 2859 minor = getminor(dev); 2860 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2861 return (ENXIO); 2862 2863 DPRINTF(IO_DBG, ("xdf@%s: read offset 0x%"PRIx64"\n", 2864 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2865 2866 part = XDF_PART(minor); 2867 if (!xdf_isopen(vdp, part)) 2868 return (ENXIO); 2869 2870 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2871 NULL, NULL, NULL, NULL)) 2872 return (ENXIO); 2873 2874 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2875 return (ENOSPC); 2876 2877 if (U_INVAL(uiop)) 2878 return (EINVAL); 2879 2880 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 2881 } 2882 2883 /*ARGSUSED*/ 2884 static int 2885 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 2886 { 2887 xdf_t *vdp; 2888 minor_t minor; 2889 diskaddr_t p_blkcnt; 2890 int part; 2891 2892 minor = getminor(dev); 2893 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2894 return (ENXIO); 2895 2896 DPRINTF(IO_DBG, ("xdf@%s: write offset 0x%"PRIx64"\n", 2897 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2898 2899 part = XDF_PART(minor); 2900 if (!xdf_isopen(vdp, part)) 2901 return (ENXIO); 2902 2903 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2904 NULL, NULL, NULL, NULL)) 2905 return (ENXIO); 2906 2907 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2908 return (ENOSPC); 2909 2910 if (U_INVAL(uiop)) 2911 return (EINVAL); 2912 2913 return (physio(xdf_strategy, NULL, dev, B_WRITE, xdfmin, uiop)); 2914 } 2915 2916 /*ARGSUSED*/ 2917 static int 2918 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 2919 { 2920 xdf_t *vdp; 2921 minor_t minor; 2922 struct uio *uiop = aiop->aio_uio; 2923 diskaddr_t p_blkcnt; 2924 int part; 2925 2926 minor = getminor(dev); 2927 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2928 return (ENXIO); 2929 2930 part = XDF_PART(minor); 2931 if (!xdf_isopen(vdp, part)) 2932 return (ENXIO); 2933 2934 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2935 NULL, NULL, NULL, NULL)) 2936 return (ENXIO); 2937 2938 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2939 return (ENOSPC); 2940 2941 if (U_INVAL(uiop)) 2942 return (EINVAL); 2943 2944 return (aphysio(xdf_strategy, anocancel, dev, B_READ, xdfmin, aiop)); 2945 } 2946 2947 /*ARGSUSED*/ 2948 static int 2949 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 2950 { 2951 xdf_t *vdp; 2952 minor_t minor; 2953 struct uio *uiop = aiop->aio_uio; 2954 diskaddr_t p_blkcnt; 2955 int part; 2956 2957 minor = getminor(dev); 2958 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2959 return (ENXIO); 2960 2961 part = XDF_PART(minor); 2962 if (!xdf_isopen(vdp, part)) 2963 return (ENXIO); 2964 2965 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2966 NULL, NULL, NULL, NULL)) 2967 return (ENXIO); 2968 2969 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2970 return (ENOSPC); 2971 2972 if (U_INVAL(uiop)) 2973 return (EINVAL); 2974 2975 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, xdfmin, aiop)); 2976 } 2977 2978 static int 2979 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 2980 { 2981 struct buf dumpbuf, *dbp = &dumpbuf; 2982 xdf_t *vdp; 2983 minor_t minor; 2984 int err = 0; 2985 int part; 2986 diskaddr_t p_blkcnt, p_blkst; 2987 2988 minor = getminor(dev); 2989 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2990 return (ENXIO); 2991 2992 DPRINTF(IO_DBG, ("xdf@%s: dump addr (0x%p) blk (%ld) nblks (%d)\n", 2993 vdp->xdf_addr, (void *)addr, blkno, nblk)); 2994 2995 /* We don't allow IO from the oe_change callback thread */ 2996 ASSERT(curthread != vdp->xdf_oe_change_thread); 2997 2998 part = XDF_PART(minor); 2999 if (!xdf_isopen(vdp, part)) 3000 return (ENXIO); 3001 3002 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 3003 NULL, NULL, NULL)) 3004 return (ENXIO); 3005 3006 if ((blkno + nblk) > 3007 (p_blkcnt * (vdp->xdf_xdev_secsize / XB_BSIZE))) { 3008 cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64, 3009 vdp->xdf_addr, (daddr_t)((blkno + nblk) / 3010 (vdp->xdf_xdev_secsize / XB_BSIZE)), (uint64_t)p_blkcnt); 3011 return (EINVAL); 3012 } 3013 3014 bioinit(dbp); 3015 dbp->b_flags = B_BUSY; 3016 dbp->b_un.b_addr = addr; 3017 dbp->b_bcount = nblk << DEV_BSHIFT; 3018 dbp->b_blkno = blkno; 3019 dbp->b_edev = dev; 3020 dbp->b_private = (void *)(uintptr_t)p_blkst; 3021 3022 mutex_enter(&vdp->xdf_dev_lk); 3023 xdf_bp_push(vdp, dbp); 3024 mutex_exit(&vdp->xdf_dev_lk); 3025 xdf_io_start(vdp); 3026 err = xdf_ring_drain(vdp); 3027 biofini(dbp); 3028 return (err); 3029 } 3030 3031 /*ARGSUSED*/ 3032 static int 3033 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 3034 { 3035 minor_t minor; 3036 xdf_t *vdp; 3037 int part; 3038 ulong_t parbit; 3039 3040 minor = getminor(dev); 3041 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 3042 return (ENXIO); 3043 3044 mutex_enter(&vdp->xdf_dev_lk); 3045 part = XDF_PART(minor); 3046 if (!xdf_isopen(vdp, part)) { 3047 mutex_exit(&vdp->xdf_dev_lk); 3048 return (ENXIO); 3049 } 3050 parbit = 1 << part; 3051 3052 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); 3053 if (otyp == OTYP_LYR) { 3054 ASSERT(vdp->xdf_vd_lyropen[part] > 0); 3055 if (--vdp->xdf_vd_lyropen[part] == 0) 3056 vdp->xdf_vd_open[otyp] &= ~parbit; 3057 } else { 3058 vdp->xdf_vd_open[otyp] &= ~parbit; 3059 } 3060 vdp->xdf_vd_exclopen &= ~parbit; 3061 3062 mutex_exit(&vdp->xdf_dev_lk); 3063 return (0); 3064 } 3065 3066 static int 3067 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 3068 { 3069 minor_t minor; 3070 xdf_t *vdp; 3071 int part; 3072 ulong_t parbit; 3073 diskaddr_t p_blkct = 0; 3074 boolean_t firstopen; 3075 boolean_t nodelay; 3076 3077 minor = getminor(*devp); 3078 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 3079 return (ENXIO); 3080 3081 nodelay = (flag & (FNDELAY | FNONBLOCK)); 3082 3083 DPRINTF(DDI_DBG, ("xdf@%s: opening\n", vdp->xdf_addr)); 3084 3085 /* do cv_wait until connected or failed */ 3086 mutex_enter(&vdp->xdf_cb_lk); 3087 mutex_enter(&vdp->xdf_dev_lk); 3088 if (!nodelay && (xdf_connect_locked(vdp, B_TRUE) != XD_READY)) { 3089 mutex_exit(&vdp->xdf_dev_lk); 3090 mutex_exit(&vdp->xdf_cb_lk); 3091 return (ENXIO); 3092 } 3093 mutex_exit(&vdp->xdf_cb_lk); 3094 3095 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 3096 mutex_exit(&vdp->xdf_dev_lk); 3097 return (EROFS); 3098 } 3099 3100 part = XDF_PART(minor); 3101 parbit = 1 << part; 3102 if ((vdp->xdf_vd_exclopen & parbit) || 3103 ((flag & FEXCL) && xdf_isopen(vdp, part))) { 3104 mutex_exit(&vdp->xdf_dev_lk); 3105 return (EBUSY); 3106 } 3107 3108 /* are we the first one to open this node? */ 3109 firstopen = !xdf_isopen(vdp, -1); 3110 3111 if (otyp == OTYP_LYR) 3112 vdp->xdf_vd_lyropen[part]++; 3113 3114 vdp->xdf_vd_open[otyp] |= parbit; 3115 3116 if (flag & FEXCL) 3117 vdp->xdf_vd_exclopen |= parbit; 3118 3119 mutex_exit(&vdp->xdf_dev_lk); 3120 3121 /* force a re-validation */ 3122 if (firstopen) 3123 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 3124 3125 /* If this is a non-blocking open then we're done */ 3126 if (nodelay) 3127 return (0); 3128 3129 /* 3130 * This is a blocking open, so we require: 3131 * - that the disk have a valid label on it 3132 * - that the size of the partition that we're opening is non-zero 3133 */ 3134 if ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 3135 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0)) { 3136 (void) xdf_close(*devp, flag, otyp, credp); 3137 return (ENXIO); 3138 } 3139 3140 return (0); 3141 } 3142 3143 /*ARGSUSED*/ 3144 static void 3145 xdf_watch_hp_status_cb(dev_info_t *dip, const char *path, void *arg) 3146 { 3147 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 3148 cv_broadcast(&vdp->xdf_hp_status_cv); 3149 } 3150 3151 static int 3152 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 3153 char *name, caddr_t valuep, int *lengthp) 3154 { 3155 xdf_t *vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 3156 3157 /* 3158 * Sanity check that if a dev_t or dip were specified that they 3159 * correspond to this device driver. On debug kernels we'll 3160 * panic and on non-debug kernels we'll return failure. 3161 */ 3162 ASSERT(ddi_driver_major(dip) == xdf_major); 3163 ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdf_major)); 3164 if ((ddi_driver_major(dip) != xdf_major) || 3165 ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdf_major))) 3166 return (DDI_PROP_NOT_FOUND); 3167 3168 if (vdp == NULL) 3169 return (ddi_prop_op(dev, dip, prop_op, flags, 3170 name, valuep, lengthp)); 3171 3172 return (cmlb_prop_op(vdp->xdf_vd_lbl, 3173 dev, dip, prop_op, flags, name, valuep, lengthp, 3174 XDF_PART(getminor(dev)), NULL)); 3175 } 3176 3177 /*ARGSUSED*/ 3178 static int 3179 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 3180 { 3181 int instance = XDF_INST(getminor((dev_t)arg)); 3182 xdf_t *vbdp; 3183 3184 switch (cmd) { 3185 case DDI_INFO_DEVT2DEVINFO: 3186 if ((vbdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) { 3187 *rp = NULL; 3188 return (DDI_FAILURE); 3189 } 3190 *rp = vbdp->xdf_dip; 3191 return (DDI_SUCCESS); 3192 3193 case DDI_INFO_DEVT2INSTANCE: 3194 *rp = (void *)(uintptr_t)instance; 3195 return (DDI_SUCCESS); 3196 3197 default: 3198 return (DDI_FAILURE); 3199 } 3200 } 3201 3202 /*ARGSUSED*/ 3203 static int 3204 xdf_resume(dev_info_t *dip) 3205 { 3206 xdf_t *vdp; 3207 char *oename; 3208 3209 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 3210 goto err; 3211 3212 if (xdf_debug & SUSRES_DBG) 3213 xen_printf("xdf@%s: xdf_resume\n", vdp->xdf_addr); 3214 3215 mutex_enter(&vdp->xdf_cb_lk); 3216 3217 if (xvdi_resume(dip) != DDI_SUCCESS) { 3218 mutex_exit(&vdp->xdf_cb_lk); 3219 goto err; 3220 } 3221 3222 if (((oename = xvdi_get_oename(dip)) == NULL) || 3223 (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3224 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)) { 3225 mutex_exit(&vdp->xdf_cb_lk); 3226 goto err; 3227 } 3228 3229 mutex_enter(&vdp->xdf_dev_lk); 3230 ASSERT(vdp->xdf_state != XD_READY); 3231 xdf_set_state(vdp, XD_UNKNOWN); 3232 mutex_exit(&vdp->xdf_dev_lk); 3233 3234 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3235 mutex_exit(&vdp->xdf_cb_lk); 3236 goto err; 3237 } 3238 3239 mutex_exit(&vdp->xdf_cb_lk); 3240 3241 if (xdf_debug & SUSRES_DBG) 3242 xen_printf("xdf@%s: xdf_resume: done\n", vdp->xdf_addr); 3243 return (DDI_SUCCESS); 3244 err: 3245 if (xdf_debug & SUSRES_DBG) 3246 xen_printf("xdf@%s: xdf_resume: fail\n", vdp->xdf_addr); 3247 return (DDI_FAILURE); 3248 } 3249 3250 /* 3251 * Uses the in-memory devid if one exists. 3252 * 3253 * Create a devid and write it on the first block of the last track of 3254 * the last cylinder. 3255 * Return DDI_SUCCESS or DDI_FAILURE. 3256 */ 3257 static int 3258 xdf_devid_fabricate(xdf_t *vdp) 3259 { 3260 ddi_devid_t devid = vdp->xdf_tgt_devid; /* null if no devid */ 3261 struct dk_devid *dkdevidp = NULL; /* devid struct stored on disk */ 3262 diskaddr_t blk; 3263 uint_t *ip, chksum; 3264 int i, devid_size; 3265 3266 if (cmlb_get_devid_block(vdp->xdf_vd_lbl, &blk, NULL) != 0) 3267 goto err; 3268 3269 if (devid == NULL && ddi_devid_init(vdp->xdf_dip, DEVID_FAB, 0, 3270 NULL, &devid) != DDI_SUCCESS) 3271 goto err; 3272 3273 /* allocate a buffer */ 3274 dkdevidp = (struct dk_devid *)kmem_zalloc(NBPSCTR, KM_SLEEP); 3275 3276 /* Fill in the revision */ 3277 dkdevidp->dkd_rev_hi = DK_DEVID_REV_MSB; 3278 dkdevidp->dkd_rev_lo = DK_DEVID_REV_LSB; 3279 3280 /* Copy in the device id */ 3281 devid_size = ddi_devid_sizeof(devid); 3282 if (devid_size > DK_DEVID_SIZE) 3283 goto err; 3284 bcopy(devid, dkdevidp->dkd_devid, devid_size); 3285 3286 /* Calculate the chksum */ 3287 chksum = 0; 3288 ip = (uint_t *)dkdevidp; 3289 for (i = 0; i < (NBPSCTR / sizeof (int)) - 1; i++) 3290 chksum ^= ip[i]; 3291 3292 /* Fill in the checksum */ 3293 DKD_FORMCHKSUM(chksum, dkdevidp); 3294 3295 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, dkdevidp, blk, 3296 NBPSCTR, NULL) != 0) 3297 goto err; 3298 3299 kmem_free(dkdevidp, NBPSCTR); 3300 3301 vdp->xdf_tgt_devid = devid; 3302 return (DDI_SUCCESS); 3303 3304 err: 3305 if (dkdevidp != NULL) 3306 kmem_free(dkdevidp, NBPSCTR); 3307 if (devid != NULL && vdp->xdf_tgt_devid == NULL) 3308 ddi_devid_free(devid); 3309 return (DDI_FAILURE); 3310 } 3311 3312 /* 3313 * xdf_devid_read() is a local copy of xdfs_devid_read(), modified to use xdf 3314 * functions. 3315 * 3316 * Read a devid from on the first block of the last track of 3317 * the last cylinder. Make sure what we read is a valid devid. 3318 * Return DDI_SUCCESS or DDI_FAILURE. 3319 */ 3320 static int 3321 xdf_devid_read(xdf_t *vdp) 3322 { 3323 diskaddr_t blk; 3324 struct dk_devid *dkdevidp; 3325 uint_t *ip, chksum; 3326 int i; 3327 3328 if (cmlb_get_devid_block(vdp->xdf_vd_lbl, &blk, NULL) != 0) 3329 return (DDI_FAILURE); 3330 3331 dkdevidp = kmem_zalloc(NBPSCTR, KM_SLEEP); 3332 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, dkdevidp, blk, 3333 NBPSCTR, NULL) != 0) 3334 goto err; 3335 3336 /* Validate the revision */ 3337 if ((dkdevidp->dkd_rev_hi != DK_DEVID_REV_MSB) || 3338 (dkdevidp->dkd_rev_lo != DK_DEVID_REV_LSB)) 3339 goto err; 3340 3341 /* Calculate the checksum */ 3342 chksum = 0; 3343 ip = (uint_t *)dkdevidp; 3344 for (i = 0; i < (NBPSCTR / sizeof (int)) - 1; i++) 3345 chksum ^= ip[i]; 3346 if (DKD_GETCHKSUM(dkdevidp) != chksum) 3347 goto err; 3348 3349 /* Validate the device id */ 3350 if (ddi_devid_valid((ddi_devid_t)dkdevidp->dkd_devid) != DDI_SUCCESS) 3351 goto err; 3352 3353 /* keep a copy of the device id */ 3354 i = ddi_devid_sizeof((ddi_devid_t)dkdevidp->dkd_devid); 3355 vdp->xdf_tgt_devid = kmem_alloc(i, KM_SLEEP); 3356 bcopy(dkdevidp->dkd_devid, vdp->xdf_tgt_devid, i); 3357 kmem_free(dkdevidp, NBPSCTR); 3358 return (DDI_SUCCESS); 3359 3360 err: 3361 kmem_free(dkdevidp, NBPSCTR); 3362 return (DDI_FAILURE); 3363 } 3364 3365 /* 3366 * xdf_devid_setup() is a modified copy of cmdk_devid_setup(). 3367 * 3368 * This function creates a devid if we don't already have one, and 3369 * registers it. If we already have one, we make sure that it can be 3370 * read from the disk, otherwise we write it to the disk ourselves. If 3371 * we didn't already have a devid, and we create one, we also need to 3372 * register it. 3373 */ 3374 void 3375 xdf_devid_setup(xdf_t *vdp) 3376 { 3377 int rc; 3378 boolean_t existed = vdp->xdf_tgt_devid != NULL; 3379 3380 /* Read devid from the disk, if present */ 3381 rc = xdf_devid_read(vdp); 3382 3383 /* Otherwise write a devid (which we create if necessary) on the disk */ 3384 if (rc != DDI_SUCCESS) 3385 rc = xdf_devid_fabricate(vdp); 3386 3387 /* If we created a devid or found it on the disk, register it */ 3388 if (rc == DDI_SUCCESS && !existed) 3389 (void) ddi_devid_register(vdp->xdf_dip, vdp->xdf_tgt_devid); 3390 } 3391 3392 static int 3393 xdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3394 { 3395 int n, instance = ddi_get_instance(dip); 3396 ddi_iblock_cookie_t ibc, softibc; 3397 boolean_t dev_iscd = B_FALSE; 3398 xdf_t *vdp; 3399 char *oename, *xsname, *str; 3400 clock_t timeout; 3401 int err = 0; 3402 3403 if ((n = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_NOTPROM, 3404 "xdf_debug", 0)) != 0) 3405 xdf_debug = n; 3406 3407 switch (cmd) { 3408 case DDI_RESUME: 3409 return (xdf_resume(dip)); 3410 case DDI_ATTACH: 3411 break; 3412 default: 3413 return (DDI_FAILURE); 3414 } 3415 /* DDI_ATTACH */ 3416 3417 if ((xsname = xvdi_get_xsname(dip)) == NULL || 3418 (oename = xvdi_get_oename(dip)) == NULL) 3419 return (DDI_FAILURE); 3420 3421 /* 3422 * Disable auto-detach. This is necessary so that we don't get 3423 * detached while we're disconnected from the back end. 3424 */ 3425 if ((ddi_prop_update_int(DDI_DEV_T_NONE, dip, 3426 DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS)) 3427 return (DDI_FAILURE); 3428 3429 /* driver handles kernel-issued IOCTLs */ 3430 if (ddi_prop_create(DDI_DEV_T_NONE, dip, 3431 DDI_PROP_CANSLEEP, DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) 3432 return (DDI_FAILURE); 3433 3434 if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS) 3435 return (DDI_FAILURE); 3436 3437 if (ddi_get_soft_iblock_cookie(dip, 3438 DDI_SOFTINT_LOW, &softibc) != DDI_SUCCESS) 3439 return (DDI_FAILURE); 3440 3441 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 3442 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 3443 ddi_get_name_addr(dip)); 3444 return (DDI_FAILURE); 3445 } 3446 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 3447 dev_iscd = B_TRUE; 3448 strfree(str); 3449 3450 if (ddi_soft_state_zalloc(xdf_ssp, instance) != DDI_SUCCESS) 3451 return (DDI_FAILURE); 3452 3453 DPRINTF(DDI_DBG, ("xdf@%s: attaching\n", ddi_get_name_addr(dip))); 3454 vdp = ddi_get_soft_state(xdf_ssp, instance); 3455 ddi_set_driver_private(dip, vdp); 3456 vdp->xdf_dip = dip; 3457 vdp->xdf_addr = ddi_get_name_addr(dip); 3458 vdp->xdf_suspending = B_FALSE; 3459 vdp->xdf_media_req_supported = B_FALSE; 3460 vdp->xdf_peer = INVALID_DOMID; 3461 vdp->xdf_evtchn = INVALID_EVTCHN; 3462 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 3463 offsetof(v_req_t, v_link)); 3464 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 3465 cv_init(&vdp->xdf_hp_status_cv, NULL, CV_DEFAULT, NULL); 3466 cv_init(&vdp->xdf_mstate_cv, NULL, CV_DEFAULT, NULL); 3467 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3468 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3469 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3470 vdp->xdf_cmlb_reattach = B_TRUE; 3471 if (dev_iscd) { 3472 vdp->xdf_dinfo |= VDISK_CDROM; 3473 vdp->xdf_mstate = DKIO_EJECTED; 3474 } else { 3475 vdp->xdf_mstate = DKIO_NONE; 3476 } 3477 3478 if ((vdp->xdf_ready_tq = ddi_taskq_create(dip, "xdf_ready_tq", 3479 1, TASKQ_DEFAULTPRI, 0)) == NULL) 3480 goto errout0; 3481 3482 if (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3483 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS) 3484 goto errout0; 3485 3486 if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 3487 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 3488 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 3489 ddi_get_name_addr(dip)); 3490 goto errout0; 3491 } 3492 3493 /* 3494 * Initialize the physical geometry stucture. Note that currently 3495 * we don't know the size of the backend device so the number 3496 * of blocks on the device will be initialized to zero. Once 3497 * we connect to the backend device we'll update the physical 3498 * geometry to reflect the real size of the device. 3499 */ 3500 xdf_synthetic_pgeom(dip, &vdp->xdf_pgeom); 3501 vdp->xdf_pgeom_fixed = B_FALSE; 3502 3503 /* 3504 * Allocate the cmlb handle, minor nodes will be created once 3505 * the device is connected with backend. 3506 */ 3507 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 3508 3509 /* We ship with cache-enabled disks */ 3510 vdp->xdf_wce = B_TRUE; 3511 3512 mutex_enter(&vdp->xdf_cb_lk); 3513 /* Watch backend XenbusState change */ 3514 if (xvdi_add_event_handler(dip, 3515 XS_OE_STATE, xdf_oe_change, NULL) != DDI_SUCCESS) { 3516 mutex_exit(&vdp->xdf_cb_lk); 3517 goto errout0; 3518 } 3519 3520 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3521 cmn_err(CE_WARN, "xdf@%s: start connection failed", 3522 ddi_get_name_addr(dip)); 3523 mutex_exit(&vdp->xdf_cb_lk); 3524 goto errout1; 3525 } 3526 3527 /* Nothing else to do for CD devices */ 3528 if (dev_iscd) { 3529 mutex_exit(&vdp->xdf_cb_lk); 3530 goto done; 3531 } 3532 3533 /* 3534 * In order to do cmlb_validate, we have to wait for the disk to 3535 * acknowledge the attach, so we can query the backend for the disk 3536 * geometry (see xdf_setstate_connected). 3537 * 3538 * We only wait 30 seconds; if this is the root disk, the boot 3539 * will fail, but it would fail anyway if the device never 3540 * connected. If this is a non-boot disk, that disk will fail 3541 * to connect, but again, it would fail anyway. 3542 */ 3543 timeout = ddi_get_lbolt() + drv_usectohz(XDF_STATE_TIMEOUT); 3544 while (vdp->xdf_state != XD_CONNECTED && vdp->xdf_state != XD_READY) { 3545 if (cv_timedwait(&vdp->xdf_dev_cv, &vdp->xdf_cb_lk, 3546 timeout) < 0) { 3547 cmn_err(CE_WARN, "xdf@%s: disk failed to connect", 3548 ddi_get_name_addr(dip)); 3549 mutex_exit(&vdp->xdf_cb_lk); 3550 goto errout1; 3551 } 3552 } 3553 mutex_exit(&vdp->xdf_cb_lk); 3554 3555 /* 3556 * We call cmlb_validate so that the geometry information in 3557 * vdp->xdf_vd_lbl is correct; this fills out the number of 3558 * alternate cylinders so that we have a place to write the 3559 * devid. 3560 */ 3561 if ((err = cmlb_validate(vdp->xdf_vd_lbl, 0, NULL)) != 0) { 3562 cmn_err(CE_NOTE, 3563 "xdf@%s: cmlb_validate failed: %d", 3564 ddi_get_name_addr(dip), err); 3565 /* 3566 * We can carry on even if cmlb_validate() returns EINVAL here, 3567 * as we'll rewrite the disk label anyway. 3568 */ 3569 if (err != EINVAL) 3570 goto errout1; 3571 } 3572 3573 /* 3574 * xdf_devid_setup will only write a devid if one isn't 3575 * already present. If it fails to find or create one, we 3576 * create one in-memory so that when we label the disk later, 3577 * it will have a devid to use. This is helpful to deal with 3578 * cases where people use the devids of their disks before 3579 * labelling them; note that this does cause problems if 3580 * people rely on the devids of unlabelled disks to persist 3581 * across reboot. 3582 */ 3583 xdf_devid_setup(vdp); 3584 if (vdp->xdf_tgt_devid == NULL) { 3585 if (ddi_devid_init(vdp->xdf_dip, DEVID_FAB, 0, NULL, 3586 &vdp->xdf_tgt_devid) != DDI_SUCCESS) { 3587 cmn_err(CE_WARN, 3588 "xdf@%s_ attach failed, devid_init failed", 3589 ddi_get_name_addr(dip)); 3590 goto errout1; 3591 } else { 3592 (void) ddi_devid_register(vdp->xdf_dip, 3593 vdp->xdf_tgt_devid); 3594 } 3595 } 3596 3597 done: 3598 #ifdef XPV_HVM_DRIVER 3599 xdf_hvm_add(dip); 3600 3601 /* Report our version to dom0. */ 3602 if (xenbus_printf(XBT_NULL, "guest/xdf", "version", "%d", 3603 HVMPV_XDF_VERS)) 3604 cmn_err(CE_WARN, "xdf: couldn't write version\n"); 3605 3606 #endif /* XPV_HVM_DRIVER */ 3607 3608 /* Create kstat for iostat(1M) */ 3609 if (xdf_kstat_create(dip) != 0) { 3610 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 3611 ddi_get_name_addr(dip)); 3612 goto errout1; 3613 } 3614 3615 /* 3616 * Don't bother with getting real device identification 3617 * strings (is it even possible?), they are unlikely to 3618 * change often (if at all). 3619 */ 3620 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, INQUIRY_VENDOR_ID, 3621 "Xen"); 3622 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, INQUIRY_PRODUCT_ID, 3623 dev_iscd ? "Virtual CD" : "Virtual disk"); 3624 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, INQUIRY_REVISION_ID, 3625 "1.0"); 3626 3627 ddi_report_dev(dip); 3628 DPRINTF(DDI_DBG, ("xdf@%s: attached\n", vdp->xdf_addr)); 3629 return (DDI_SUCCESS); 3630 3631 errout1: 3632 (void) xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed); 3633 xvdi_remove_event_handler(dip, XS_OE_STATE); 3634 errout0: 3635 if (vdp->xdf_vd_lbl != NULL) { 3636 cmlb_free_handle(&vdp->xdf_vd_lbl); 3637 vdp->xdf_vd_lbl = NULL; 3638 } 3639 if (vdp->xdf_softintr_id != NULL) 3640 ddi_remove_softintr(vdp->xdf_softintr_id); 3641 xvdi_remove_xb_watch_handlers(dip); 3642 if (vdp->xdf_ready_tq != NULL) 3643 ddi_taskq_destroy(vdp->xdf_ready_tq); 3644 mutex_destroy(&vdp->xdf_cb_lk); 3645 mutex_destroy(&vdp->xdf_dev_lk); 3646 cv_destroy(&vdp->xdf_dev_cv); 3647 cv_destroy(&vdp->xdf_hp_status_cv); 3648 ddi_soft_state_free(xdf_ssp, instance); 3649 ddi_set_driver_private(dip, NULL); 3650 ddi_prop_remove_all(dip); 3651 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(dip)); 3652 return (DDI_FAILURE); 3653 } 3654 3655 static int 3656 xdf_suspend(dev_info_t *dip) 3657 { 3658 int instance = ddi_get_instance(dip); 3659 xdf_t *vdp; 3660 3661 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) 3662 return (DDI_FAILURE); 3663 3664 if (xdf_debug & SUSRES_DBG) 3665 xen_printf("xdf@%s: xdf_suspend\n", vdp->xdf_addr); 3666 3667 xvdi_suspend(dip); 3668 3669 mutex_enter(&vdp->xdf_cb_lk); 3670 mutex_enter(&vdp->xdf_dev_lk); 3671 3672 vdp->xdf_suspending = B_TRUE; 3673 xdf_ring_destroy(vdp); 3674 xdf_set_state(vdp, XD_SUSPEND); 3675 vdp->xdf_suspending = B_FALSE; 3676 3677 mutex_exit(&vdp->xdf_dev_lk); 3678 mutex_exit(&vdp->xdf_cb_lk); 3679 3680 if (xdf_debug & SUSRES_DBG) 3681 xen_printf("xdf@%s: xdf_suspend: done\n", vdp->xdf_addr); 3682 3683 return (DDI_SUCCESS); 3684 } 3685 3686 static int 3687 xdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3688 { 3689 xdf_t *vdp; 3690 int instance; 3691 3692 switch (cmd) { 3693 3694 case DDI_PM_SUSPEND: 3695 break; 3696 3697 case DDI_SUSPEND: 3698 return (xdf_suspend(dip)); 3699 3700 case DDI_DETACH: 3701 break; 3702 3703 default: 3704 return (DDI_FAILURE); 3705 } 3706 3707 instance = ddi_get_instance(dip); 3708 DPRINTF(DDI_DBG, ("xdf@%s: detaching\n", ddi_get_name_addr(dip))); 3709 vdp = ddi_get_soft_state(xdf_ssp, instance); 3710 3711 if (vdp == NULL) 3712 return (DDI_FAILURE); 3713 3714 mutex_enter(&vdp->xdf_cb_lk); 3715 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 3716 if (vdp->xdf_state != XD_CLOSED) { 3717 mutex_exit(&vdp->xdf_cb_lk); 3718 return (DDI_FAILURE); 3719 } 3720 mutex_exit(&vdp->xdf_cb_lk); 3721 3722 ASSERT(!ISDMACBON(vdp)); 3723 3724 #ifdef XPV_HVM_DRIVER 3725 xdf_hvm_rm(dip); 3726 #endif /* XPV_HVM_DRIVER */ 3727 3728 if (vdp->xdf_timeout_id != 0) 3729 (void) untimeout(vdp->xdf_timeout_id); 3730 3731 xvdi_remove_event_handler(dip, XS_OE_STATE); 3732 ddi_taskq_destroy(vdp->xdf_ready_tq); 3733 3734 cmlb_detach(vdp->xdf_vd_lbl, NULL); 3735 cmlb_free_handle(&vdp->xdf_vd_lbl); 3736 3737 /* we'll support backend running in domU later */ 3738 #ifdef DOMU_BACKEND 3739 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 3740 #endif 3741 3742 list_destroy(&vdp->xdf_vreq_act); 3743 ddi_prop_remove_all(dip); 3744 xdf_kstat_delete(dip); 3745 ddi_remove_softintr(vdp->xdf_softintr_id); 3746 xvdi_remove_xb_watch_handlers(dip); 3747 ddi_set_driver_private(dip, NULL); 3748 cv_destroy(&vdp->xdf_dev_cv); 3749 mutex_destroy(&vdp->xdf_cb_lk); 3750 mutex_destroy(&vdp->xdf_dev_lk); 3751 if (vdp->xdf_cache_flush_block != NULL) 3752 kmem_free(vdp->xdf_flush_mem, 2 * vdp->xdf_xdev_secsize); 3753 ddi_soft_state_free(xdf_ssp, instance); 3754 return (DDI_SUCCESS); 3755 } 3756 3757 /* 3758 * Driver linkage structures. 3759 */ 3760 static struct cb_ops xdf_cbops = { 3761 xdf_open, 3762 xdf_close, 3763 xdf_strategy, 3764 nodev, 3765 xdf_dump, 3766 xdf_read, 3767 xdf_write, 3768 xdf_ioctl, 3769 nodev, 3770 nodev, 3771 nodev, 3772 nochpoll, 3773 xdf_prop_op, 3774 NULL, 3775 D_MP | D_NEW | D_64BIT, 3776 CB_REV, 3777 xdf_aread, 3778 xdf_awrite 3779 }; 3780 3781 struct dev_ops xdf_devops = { 3782 DEVO_REV, /* devo_rev */ 3783 0, /* devo_refcnt */ 3784 xdf_getinfo, /* devo_getinfo */ 3785 nulldev, /* devo_identify */ 3786 nulldev, /* devo_probe */ 3787 xdf_attach, /* devo_attach */ 3788 xdf_detach, /* devo_detach */ 3789 nodev, /* devo_reset */ 3790 &xdf_cbops, /* devo_cb_ops */ 3791 NULL, /* devo_bus_ops */ 3792 NULL, /* devo_power */ 3793 ddi_quiesce_not_supported, /* devo_quiesce */ 3794 }; 3795 3796 /* 3797 * Module linkage structures. 3798 */ 3799 static struct modldrv modldrv = { 3800 &mod_driverops, /* Type of module. This one is a driver */ 3801 "virtual block driver", /* short description */ 3802 &xdf_devops /* driver specific ops */ 3803 }; 3804 3805 static struct modlinkage xdf_modlinkage = { 3806 MODREV_1, (void *)&modldrv, NULL 3807 }; 3808 3809 /* 3810 * standard module entry points 3811 */ 3812 int 3813 _init(void) 3814 { 3815 int rc; 3816 3817 xdf_major = ddi_name_to_major("xdf"); 3818 if (xdf_major == (major_t)-1) 3819 return (EINVAL); 3820 3821 if ((rc = ddi_soft_state_init(&xdf_ssp, sizeof (xdf_t), 0)) != 0) 3822 return (rc); 3823 3824 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 3825 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3826 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 3827 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3828 3829 #ifdef XPV_HVM_DRIVER 3830 xdf_hvm_init(); 3831 #endif /* XPV_HVM_DRIVER */ 3832 3833 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 3834 #ifdef XPV_HVM_DRIVER 3835 xdf_hvm_fini(); 3836 #endif /* XPV_HVM_DRIVER */ 3837 kmem_cache_destroy(xdf_vreq_cache); 3838 kmem_cache_destroy(xdf_gs_cache); 3839 ddi_soft_state_fini(&xdf_ssp); 3840 return (rc); 3841 } 3842 3843 return (rc); 3844 } 3845 3846 int 3847 _fini(void) 3848 { 3849 int err; 3850 if ((err = mod_remove(&xdf_modlinkage)) != 0) 3851 return (err); 3852 3853 #ifdef XPV_HVM_DRIVER 3854 xdf_hvm_fini(); 3855 #endif /* XPV_HVM_DRIVER */ 3856 3857 kmem_cache_destroy(xdf_vreq_cache); 3858 kmem_cache_destroy(xdf_gs_cache); 3859 ddi_soft_state_fini(&xdf_ssp); 3860 3861 return (0); 3862 } 3863 3864 int 3865 _info(struct modinfo *modinfop) 3866 { 3867 return (mod_info(&xdf_modlinkage, modinfop)); 3868 } 3869