1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright (c) 2014, 2017 by Delphix. All rights reserved. 29 * Copyright 2017 Nexenta Systems, Inc. 30 */ 31 32 /* 33 * xdf.c - Xen Virtual Block Device Driver 34 * TODO: 35 * - support alternate block size (currently only DEV_BSIZE supported) 36 * - revalidate geometry for removable devices 37 * 38 * This driver exports disk device nodes, accepts IO requests from those 39 * nodes, and services those requests by talking to a backend device 40 * in another domain. 41 * 42 * Communication with the backend device is done via a ringbuffer (which is 43 * managed via xvdi interfaces) and dma memory (which is managed via ddi 44 * interfaces). 45 * 46 * Communication with the backend device is dependant upon establishing a 47 * connection to the backend device. This connection process involves 48 * reading device configuration information from xenbus and publishing 49 * some frontend runtime configuration parameters via the xenbus (for 50 * consumption by the backend). Once we've published runtime configuration 51 * information via the xenbus, the backend device can enter the connected 52 * state and we'll enter the XD_CONNECTED state. But before we can allow 53 * random IO to begin, we need to do IO to the backend device to determine 54 * the device label and if flush operations are supported. Once this is 55 * done we enter the XD_READY state and can process any IO operations. 56 * 57 * We receive notifications of xenbus state changes for the backend device 58 * (aka, the "other end") via the xdf_oe_change() callback. This callback 59 * is single threaded, meaning that we can't receive new notification of 60 * other end state changes while we're processing an outstanding 61 * notification of an other end state change. There for we can't do any 62 * blocking operations from the xdf_oe_change() callback. This is why we 63 * have a seperate taskq (xdf_ready_tq) which exists to do the necessary 64 * IO to get us from the XD_CONNECTED to the XD_READY state. All IO 65 * generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go 66 * throught xdf_lb_rdwr(), which is a synchronous IO interface. IOs 67 * generated by the xdf_ready_tq_thread thread have priority over all 68 * other IO requests. 69 * 70 * We also communicate with the backend device via the xenbus "media-req" 71 * (XBP_MEDIA_REQ) property. For more information on this see the 72 * comments in blkif.h. 73 */ 74 75 #include <io/xdf.h> 76 77 #include <sys/conf.h> 78 #include <sys/dkio.h> 79 #include <sys/promif.h> 80 #include <sys/sysmacros.h> 81 #include <sys/kstat.h> 82 #include <sys/mach_mmu.h> 83 #ifdef XPV_HVM_DRIVER 84 #include <sys/xpv_support.h> 85 #else /* !XPV_HVM_DRIVER */ 86 #include <sys/evtchn_impl.h> 87 #endif /* !XPV_HVM_DRIVER */ 88 #include <sys/sunndi.h> 89 #include <public/io/xenbus.h> 90 #include <xen/sys/xenbus_impl.h> 91 #include <sys/scsi/generic/inquiry.h> 92 #include <xen/io/blkif_impl.h> 93 #include <sys/fdio.h> 94 #include <sys/cdio.h> 95 96 /* 97 * DEBUG_EVAL can be used to include debug only statements without 98 * having to use '#ifdef DEBUG' statements 99 */ 100 #ifdef DEBUG 101 #define DEBUG_EVAL(x) (x) 102 #else /* !DEBUG */ 103 #define DEBUG_EVAL(x) 104 #endif /* !DEBUG */ 105 106 #define XDF_DRAIN_MSEC_DELAY (50*1000) /* 00.05 sec */ 107 #define XDF_DRAIN_RETRY_COUNT 200 /* 10.00 sec */ 108 #define XDF_STATE_TIMEOUT (30*1000*1000) /* 30.00 sec */ 109 110 #define INVALID_DOMID ((domid_t)-1) 111 #define FLUSH_DISKCACHE 0x1 112 #define WRITE_BARRIER 0x2 113 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ 114 #define USE_WRITE_BARRIER(vdp) \ 115 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) 116 #define USE_FLUSH_DISKCACHE(vdp) \ 117 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) 118 #define IS_WRITE_BARRIER(vdp, bp) \ 119 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ 120 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) 121 #define IS_FLUSH_DISKCACHE(bp) \ 122 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) 123 124 #define VREQ_DONE(vreq) \ 125 VOID2BOOLEAN(((vreq)->v_status == VREQ_DMAWIN_DONE) && \ 126 (((vreq)->v_flush_diskcache == FLUSH_DISKCACHE) || \ 127 (((vreq)->v_dmaw + 1) == (vreq)->v_ndmaws))) 128 129 #define BP_VREQ(bp) ((v_req_t *)((bp)->av_back)) 130 #define BP_VREQ_SET(bp, vreq) (((bp)->av_back = (buf_t *)(vreq))) 131 132 extern int do_polled_io; 133 134 /* run-time tunables that we don't want the compiler to optimize away */ 135 volatile int xdf_debug = 0; 136 volatile boolean_t xdf_barrier_flush_disable = B_FALSE; 137 138 /* per module globals */ 139 major_t xdf_major; 140 static void *xdf_ssp; 141 static kmem_cache_t *xdf_vreq_cache; 142 static kmem_cache_t *xdf_gs_cache; 143 static int xdf_maxphys = XB_MAXPHYS; 144 static diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; 145 static int xdf_fbrewrites; /* flush block re-write count */ 146 147 /* misc public functions */ 148 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); 149 int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 150 151 /* misc private functions */ 152 static void xdf_io_start(xdf_t *); 153 static void xdf_devid_setup(xdf_t *); 154 155 /* callbacks from commmon label */ 156 static cmlb_tg_ops_t xdf_lb_ops = { 157 TG_DK_OPS_VERSION_1, 158 xdf_lb_rdwr, 159 xdf_lb_getinfo 160 }; 161 162 /* 163 * I/O buffer DMA attributes 164 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most 165 */ 166 static ddi_dma_attr_t xb_dma_attr = { 167 DMA_ATTR_V0, 168 (uint64_t)0, /* lowest address */ 169 (uint64_t)0xffffffffffffffff, /* highest usable address */ 170 (uint64_t)0xffffff, /* DMA counter limit max */ 171 (uint64_t)XB_BSIZE, /* alignment in bytes */ 172 XB_BSIZE - 1, /* bitmap of burst sizes */ 173 XB_BSIZE, /* min transfer */ 174 (uint64_t)XB_MAX_XFER, /* maximum transfer */ 175 (uint64_t)PAGEOFFSET, /* 1 page segment length */ 176 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */ 177 XB_BSIZE, /* granularity */ 178 0, /* flags (reserved) */ 179 }; 180 181 static ddi_device_acc_attr_t xc_acc_attr = { 182 DDI_DEVICE_ATTR_V0, 183 DDI_NEVERSWAP_ACC, 184 DDI_STRICTORDER_ACC 185 }; 186 187 static void 188 xdf_timeout_handler(void *arg) 189 { 190 xdf_t *vdp = arg; 191 192 mutex_enter(&vdp->xdf_dev_lk); 193 vdp->xdf_timeout_id = 0; 194 mutex_exit(&vdp->xdf_dev_lk); 195 196 /* new timeout thread could be re-scheduled */ 197 xdf_io_start(vdp); 198 } 199 200 /* 201 * callback func when DMA/GTE resources is available 202 * 203 * Note: we only register one callback function to grant table subsystem 204 * since we only have one 'struct gnttab_free_callback' in xdf_t. 205 */ 206 static int 207 xdf_dmacallback(caddr_t arg) 208 { 209 xdf_t *vdp = (xdf_t *)arg; 210 ASSERT(vdp != NULL); 211 212 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", 213 vdp->xdf_addr)); 214 215 ddi_trigger_softintr(vdp->xdf_softintr_id); 216 return (DDI_DMA_CALLBACK_DONE); 217 } 218 219 static ge_slot_t * 220 gs_get(xdf_t *vdp, int isread) 221 { 222 grant_ref_t gh; 223 ge_slot_t *gs; 224 225 /* try to alloc GTEs needed in this slot, first */ 226 if (gnttab_alloc_grant_references( 227 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { 228 if (vdp->xdf_gnt_callback.next == NULL) { 229 SETDMACBON(vdp); 230 gnttab_request_free_callback( 231 &vdp->xdf_gnt_callback, 232 (void (*)(void *))xdf_dmacallback, 233 (void *)vdp, 234 BLKIF_MAX_SEGMENTS_PER_REQUEST); 235 } 236 return (NULL); 237 } 238 239 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); 240 if (gs == NULL) { 241 gnttab_free_grant_references(gh); 242 if (vdp->xdf_timeout_id == 0) 243 /* restart I/O after one second */ 244 vdp->xdf_timeout_id = 245 timeout(xdf_timeout_handler, vdp, hz); 246 return (NULL); 247 } 248 249 /* init gs_slot */ 250 gs->gs_oeid = vdp->xdf_peer; 251 gs->gs_isread = isread; 252 gs->gs_ghead = gh; 253 gs->gs_ngrefs = 0; 254 255 return (gs); 256 } 257 258 static void 259 gs_free(ge_slot_t *gs) 260 { 261 int i; 262 263 /* release all grant table entry resources used in this slot */ 264 for (i = 0; i < gs->gs_ngrefs; i++) 265 gnttab_end_foreign_access(gs->gs_ge[i], !gs->gs_isread, 0); 266 gnttab_free_grant_references(gs->gs_ghead); 267 list_remove(&gs->gs_vreq->v_gs, gs); 268 kmem_cache_free(xdf_gs_cache, gs); 269 } 270 271 static grant_ref_t 272 gs_grant(ge_slot_t *gs, mfn_t mfn) 273 { 274 grant_ref_t gr = gnttab_claim_grant_reference(&gs->gs_ghead); 275 276 ASSERT(gr != -1); 277 ASSERT(gs->gs_ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); 278 gs->gs_ge[gs->gs_ngrefs++] = gr; 279 gnttab_grant_foreign_access_ref(gr, gs->gs_oeid, mfn, !gs->gs_isread); 280 281 return (gr); 282 } 283 284 /* 285 * Alloc a vreq for this bp 286 * bp->av_back contains the pointer to the vreq upon return 287 */ 288 static v_req_t * 289 vreq_get(xdf_t *vdp, buf_t *bp) 290 { 291 v_req_t *vreq = NULL; 292 293 ASSERT(BP_VREQ(bp) == NULL); 294 295 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); 296 if (vreq == NULL) { 297 if (vdp->xdf_timeout_id == 0) 298 /* restart I/O after one second */ 299 vdp->xdf_timeout_id = 300 timeout(xdf_timeout_handler, vdp, hz); 301 return (NULL); 302 } 303 bzero(vreq, sizeof (v_req_t)); 304 list_create(&vreq->v_gs, sizeof (ge_slot_t), 305 offsetof(ge_slot_t, gs_vreq_link)); 306 vreq->v_buf = bp; 307 vreq->v_status = VREQ_INIT; 308 vreq->v_runq = B_FALSE; 309 BP_VREQ_SET(bp, vreq); 310 /* init of other fields in vreq is up to the caller */ 311 312 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); 313 314 return (vreq); 315 } 316 317 static void 318 vreq_free(xdf_t *vdp, v_req_t *vreq) 319 { 320 buf_t *bp = vreq->v_buf; 321 322 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 323 ASSERT(BP_VREQ(bp) == vreq); 324 325 list_remove(&vdp->xdf_vreq_act, vreq); 326 327 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) 328 goto done; 329 330 switch (vreq->v_status) { 331 case VREQ_DMAWIN_DONE: 332 case VREQ_GS_ALLOCED: 333 case VREQ_DMABUF_BOUND: 334 (void) ddi_dma_unbind_handle(vreq->v_dmahdl); 335 /*FALLTHRU*/ 336 case VREQ_DMAMEM_ALLOCED: 337 if (!ALIGNED_XFER(bp)) { 338 ASSERT(vreq->v_abuf != NULL); 339 if (!IS_ERROR(bp) && IS_READ(bp)) 340 bcopy(vreq->v_abuf, bp->b_un.b_addr, 341 bp->b_bcount); 342 ddi_dma_mem_free(&vreq->v_align); 343 } 344 /*FALLTHRU*/ 345 case VREQ_MEMDMAHDL_ALLOCED: 346 if (!ALIGNED_XFER(bp)) 347 ddi_dma_free_handle(&vreq->v_memdmahdl); 348 /*FALLTHRU*/ 349 case VREQ_DMAHDL_ALLOCED: 350 ddi_dma_free_handle(&vreq->v_dmahdl); 351 break; 352 default: 353 break; 354 } 355 done: 356 ASSERT(!vreq->v_runq); 357 list_destroy(&vreq->v_gs); 358 kmem_cache_free(xdf_vreq_cache, vreq); 359 } 360 361 /* 362 * Snarf new data if our flush block was re-written 363 */ 364 static void 365 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) 366 { 367 int nblks; 368 boolean_t mapin; 369 370 if (IS_WRITE_BARRIER(vdp, bp)) 371 return; /* write was a flush write */ 372 373 mapin = B_FALSE; 374 nblks = bp->b_bcount >> DEV_BSHIFT; 375 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { 376 xdf_fbrewrites++; 377 if (bp->b_flags & (B_PAGEIO | B_PHYS)) { 378 mapin = B_TRUE; 379 bp_mapin(bp); 380 } 381 bcopy(bp->b_un.b_addr + 382 ((xdf_flush_block - blkno) << DEV_BSHIFT), 383 vdp->xdf_cache_flush_block, DEV_BSIZE); 384 if (mapin) 385 bp_mapout(bp); 386 } 387 } 388 389 /* 390 * Initalize the DMA and grant table resources for the buf 391 */ 392 static int 393 vreq_setup(xdf_t *vdp, v_req_t *vreq) 394 { 395 int rc; 396 ddi_dma_attr_t dmaattr; 397 uint_t ndcs, ndws; 398 ddi_dma_handle_t dh; 399 ddi_dma_handle_t mdh; 400 ddi_dma_cookie_t dc; 401 ddi_acc_handle_t abh; 402 caddr_t aba; 403 ge_slot_t *gs; 404 size_t bufsz; 405 off_t off; 406 size_t sz; 407 buf_t *bp = vreq->v_buf; 408 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | 409 DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 410 411 switch (vreq->v_status) { 412 case VREQ_INIT: 413 if (IS_FLUSH_DISKCACHE(bp)) { 414 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 415 DPRINTF(DMA_DBG, ("xdf@%s: " 416 "get ge_slotfailed\n", vdp->xdf_addr)); 417 return (DDI_FAILURE); 418 } 419 vreq->v_blkno = 0; 420 vreq->v_nslots = 1; 421 vreq->v_flush_diskcache = FLUSH_DISKCACHE; 422 vreq->v_status = VREQ_GS_ALLOCED; 423 gs->gs_vreq = vreq; 424 list_insert_head(&vreq->v_gs, gs); 425 return (DDI_SUCCESS); 426 } 427 428 if (IS_WRITE_BARRIER(vdp, bp)) 429 vreq->v_flush_diskcache = WRITE_BARRIER; 430 vreq->v_blkno = bp->b_blkno + 431 (diskaddr_t)(uintptr_t)bp->b_private; 432 /* See if we wrote new data to our flush block */ 433 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) 434 check_fbwrite(vdp, bp, vreq->v_blkno); 435 vreq->v_status = VREQ_INIT_DONE; 436 /*FALLTHRU*/ 437 438 case VREQ_INIT_DONE: 439 /* 440 * alloc DMA handle 441 */ 442 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, 443 xdf_dmacallback, (caddr_t)vdp, &dh); 444 if (rc != DDI_SUCCESS) { 445 SETDMACBON(vdp); 446 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", 447 vdp->xdf_addr)); 448 return (DDI_FAILURE); 449 } 450 451 vreq->v_dmahdl = dh; 452 vreq->v_status = VREQ_DMAHDL_ALLOCED; 453 /*FALLTHRU*/ 454 455 case VREQ_DMAHDL_ALLOCED: 456 /* 457 * alloc dma handle for 512-byte aligned buf 458 */ 459 if (!ALIGNED_XFER(bp)) { 460 /* 461 * XXPV: we need to temporarily enlarge the seg 462 * boundary and s/g length to work round CR6381968 463 */ 464 dmaattr = xb_dma_attr; 465 dmaattr.dma_attr_seg = (uint64_t)-1; 466 dmaattr.dma_attr_sgllen = INT_MAX; 467 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, 468 xdf_dmacallback, (caddr_t)vdp, &mdh); 469 if (rc != DDI_SUCCESS) { 470 SETDMACBON(vdp); 471 DPRINTF(DMA_DBG, ("xdf@%s: " 472 "unaligned buf DMAhandle alloc failed\n", 473 vdp->xdf_addr)); 474 return (DDI_FAILURE); 475 } 476 vreq->v_memdmahdl = mdh; 477 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; 478 } 479 /*FALLTHRU*/ 480 481 case VREQ_MEMDMAHDL_ALLOCED: 482 /* 483 * alloc 512-byte aligned buf 484 */ 485 if (!ALIGNED_XFER(bp)) { 486 if (bp->b_flags & (B_PAGEIO | B_PHYS)) 487 bp_mapin(bp); 488 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, 489 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, 490 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, 491 &aba, &bufsz, &abh); 492 if (rc != DDI_SUCCESS) { 493 SETDMACBON(vdp); 494 DPRINTF(DMA_DBG, ("xdf@%s: " 495 "DMA mem allocation failed\n", 496 vdp->xdf_addr)); 497 return (DDI_FAILURE); 498 } 499 500 vreq->v_abuf = aba; 501 vreq->v_align = abh; 502 vreq->v_status = VREQ_DMAMEM_ALLOCED; 503 504 ASSERT(bufsz >= bp->b_bcount); 505 if (!IS_READ(bp)) 506 bcopy(bp->b_un.b_addr, vreq->v_abuf, 507 bp->b_bcount); 508 } 509 /*FALLTHRU*/ 510 511 case VREQ_DMAMEM_ALLOCED: 512 /* 513 * dma bind 514 */ 515 if (ALIGNED_XFER(bp)) { 516 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, 517 dma_flags, xdf_dmacallback, (caddr_t)vdp, 518 &dc, &ndcs); 519 } else { 520 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, 521 NULL, vreq->v_abuf, bp->b_bcount, dma_flags, 522 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); 523 } 524 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { 525 /* get num of dma windows */ 526 if (rc == DDI_DMA_PARTIAL_MAP) { 527 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); 528 ASSERT(rc == DDI_SUCCESS); 529 } else { 530 ndws = 1; 531 } 532 } else { 533 SETDMACBON(vdp); 534 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", 535 vdp->xdf_addr)); 536 return (DDI_FAILURE); 537 } 538 539 vreq->v_dmac = dc; 540 vreq->v_dmaw = 0; 541 vreq->v_ndmacs = ndcs; 542 vreq->v_ndmaws = ndws; 543 vreq->v_nslots = ndws; 544 vreq->v_status = VREQ_DMABUF_BOUND; 545 /*FALLTHRU*/ 546 547 case VREQ_DMABUF_BOUND: 548 /* 549 * get ge_slot, callback is set upon failure from gs_get(), 550 * if not set previously 551 */ 552 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 553 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 554 vdp->xdf_addr)); 555 return (DDI_FAILURE); 556 } 557 558 vreq->v_status = VREQ_GS_ALLOCED; 559 gs->gs_vreq = vreq; 560 list_insert_head(&vreq->v_gs, gs); 561 break; 562 563 case VREQ_GS_ALLOCED: 564 /* nothing need to be done */ 565 break; 566 567 case VREQ_DMAWIN_DONE: 568 /* 569 * move to the next dma window 570 */ 571 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); 572 573 /* get a ge_slot for this DMA window */ 574 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { 575 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", 576 vdp->xdf_addr)); 577 return (DDI_FAILURE); 578 } 579 580 vreq->v_dmaw++; 581 VERIFY(ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, 582 &vreq->v_dmac, &vreq->v_ndmacs) == DDI_SUCCESS); 583 vreq->v_status = VREQ_GS_ALLOCED; 584 gs->gs_vreq = vreq; 585 list_insert_head(&vreq->v_gs, gs); 586 break; 587 588 default: 589 return (DDI_FAILURE); 590 } 591 592 return (DDI_SUCCESS); 593 } 594 595 static int 596 xdf_cmlb_attach(xdf_t *vdp) 597 { 598 dev_info_t *dip = vdp->xdf_dip; 599 600 return (cmlb_attach(dip, &xdf_lb_ops, 601 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, 602 XD_IS_RM(vdp), 603 B_TRUE, 604 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD, 605 #ifdef XPV_HVM_DRIVER 606 (XD_IS_CD(vdp) ? 0 : CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT), 607 #else /* XPV_HVM_DRIVER */ 608 0, 609 #endif /* XPV_HVM_DRIVER */ 610 vdp->xdf_vd_lbl, NULL)); 611 } 612 613 static void 614 xdf_io_err(buf_t *bp, int err, size_t resid) 615 { 616 bioerror(bp, err); 617 if (resid == 0) 618 bp->b_resid = bp->b_bcount; 619 biodone(bp); 620 } 621 622 static void 623 xdf_kstat_enter(xdf_t *vdp, buf_t *bp) 624 { 625 v_req_t *vreq = BP_VREQ(bp); 626 627 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 628 629 if (vdp->xdf_xdev_iostat == NULL) 630 return; 631 if ((vreq != NULL) && vreq->v_runq) { 632 kstat_runq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 633 } else { 634 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 635 } 636 } 637 638 static void 639 xdf_kstat_exit(xdf_t *vdp, buf_t *bp) 640 { 641 v_req_t *vreq = BP_VREQ(bp); 642 643 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 644 645 if (vdp->xdf_xdev_iostat == NULL) 646 return; 647 648 if ((vreq != NULL) && vreq->v_runq) { 649 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 650 } else { 651 kstat_waitq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 652 } 653 654 if (bp->b_flags & B_READ) { 655 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->reads++; 656 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->nread += bp->b_bcount; 657 } else if (bp->b_flags & B_WRITE) { 658 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->writes++; 659 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->nwritten += bp->b_bcount; 660 } 661 } 662 663 static void 664 xdf_kstat_waitq_to_runq(xdf_t *vdp, buf_t *bp) 665 { 666 v_req_t *vreq = BP_VREQ(bp); 667 668 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 669 ASSERT(!vreq->v_runq); 670 671 vreq->v_runq = B_TRUE; 672 if (vdp->xdf_xdev_iostat == NULL) 673 return; 674 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 675 } 676 677 static void 678 xdf_kstat_runq_to_waitq(xdf_t *vdp, buf_t *bp) 679 { 680 v_req_t *vreq = BP_VREQ(bp); 681 682 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 683 ASSERT(vreq->v_runq); 684 685 vreq->v_runq = B_FALSE; 686 if (vdp->xdf_xdev_iostat == NULL) 687 return; 688 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); 689 } 690 691 int 692 xdf_kstat_create(dev_info_t *dip) 693 { 694 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 695 kstat_t *kstat; 696 buf_t *bp; 697 698 if ((kstat = kstat_create("xdf", ddi_get_instance(dip), NULL, "disk", 699 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) 700 return (-1); 701 702 /* See comment about locking in xdf_kstat_delete(). */ 703 mutex_enter(&vdp->xdf_iostat_lk); 704 mutex_enter(&vdp->xdf_dev_lk); 705 706 /* only one kstat can exist at a time */ 707 if (vdp->xdf_xdev_iostat != NULL) { 708 mutex_exit(&vdp->xdf_dev_lk); 709 mutex_exit(&vdp->xdf_iostat_lk); 710 kstat_delete(kstat); 711 return (-1); 712 } 713 714 vdp->xdf_xdev_iostat = kstat; 715 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; 716 kstat_install(vdp->xdf_xdev_iostat); 717 718 /* 719 * Now that we've created a kstat, we need to update the waitq and 720 * runq counts for the kstat to reflect our current state. 721 * 722 * For a buf_t structure to be on the runq, it must have a ring 723 * buffer slot associated with it. To get a ring buffer slot the 724 * buf must first have a v_req_t and a ge_slot_t associated with it. 725 * Then when it is granted a ring buffer slot, v_runq will be set to 726 * true. 727 * 728 * For a buf_t structure to be on the waitq, it must not be on the 729 * runq. So to find all the buf_t's that should be on waitq, we 730 * walk the active buf list and add any buf_t's which aren't on the 731 * runq to the waitq. 732 */ 733 bp = vdp->xdf_f_act; 734 while (bp != NULL) { 735 xdf_kstat_enter(vdp, bp); 736 bp = bp->av_forw; 737 } 738 if (vdp->xdf_ready_tq_bp != NULL) 739 xdf_kstat_enter(vdp, vdp->xdf_ready_tq_bp); 740 741 mutex_exit(&vdp->xdf_dev_lk); 742 mutex_exit(&vdp->xdf_iostat_lk); 743 return (0); 744 } 745 746 void 747 xdf_kstat_delete(dev_info_t *dip) 748 { 749 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 750 kstat_t *kstat; 751 buf_t *bp; 752 753 /* 754 * The locking order here is xdf_iostat_lk and then xdf_dev_lk. 755 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer 756 * and the contents of the our kstat. xdf_iostat_lk is used 757 * to protect the allocation and freeing of the actual kstat. 758 * xdf_dev_lk can't be used for this purpose because kstat 759 * readers use it to access the contents of the kstat and 760 * hence it can't be held when calling kstat_delete(). 761 */ 762 mutex_enter(&vdp->xdf_iostat_lk); 763 mutex_enter(&vdp->xdf_dev_lk); 764 765 if (vdp->xdf_xdev_iostat == NULL) { 766 mutex_exit(&vdp->xdf_dev_lk); 767 mutex_exit(&vdp->xdf_iostat_lk); 768 return; 769 } 770 771 /* 772 * We're about to destroy the kstat structures, so it isn't really 773 * necessary to update the runq and waitq counts. But, since this 774 * isn't a hot code path we can afford to be a little pedantic and 775 * go ahead and decrement the runq and waitq kstat counters to zero 776 * before free'ing them. This helps us ensure that we've gotten all 777 * our accounting correct. 778 * 779 * For an explanation of how we determine which buffers go on the 780 * runq vs which go on the waitq, see the comments in 781 * xdf_kstat_create(). 782 */ 783 bp = vdp->xdf_f_act; 784 while (bp != NULL) { 785 xdf_kstat_exit(vdp, bp); 786 bp = bp->av_forw; 787 } 788 if (vdp->xdf_ready_tq_bp != NULL) 789 xdf_kstat_exit(vdp, vdp->xdf_ready_tq_bp); 790 791 kstat = vdp->xdf_xdev_iostat; 792 vdp->xdf_xdev_iostat = NULL; 793 mutex_exit(&vdp->xdf_dev_lk); 794 kstat_delete(kstat); 795 mutex_exit(&vdp->xdf_iostat_lk); 796 } 797 798 /* 799 * Add an IO requests onto the active queue. 800 * 801 * We have to detect IOs generated by xdf_ready_tq_thread. These IOs 802 * are used to establish a connection to the backend, so they receive 803 * priority over all other IOs. Since xdf_ready_tq_thread only does 804 * synchronous IO, there can only be one xdf_ready_tq_thread request at any 805 * given time and we record the buf associated with that request in 806 * xdf_ready_tq_bp. 807 */ 808 static void 809 xdf_bp_push(xdf_t *vdp, buf_t *bp) 810 { 811 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 812 ASSERT(bp->av_forw == NULL); 813 814 xdf_kstat_enter(vdp, bp); 815 816 if (curthread == vdp->xdf_ready_tq_thread) { 817 /* new IO requests from the ready thread */ 818 ASSERT(vdp->xdf_ready_tq_bp == NULL); 819 vdp->xdf_ready_tq_bp = bp; 820 return; 821 } 822 823 /* this is normal IO request */ 824 ASSERT(bp != vdp->xdf_ready_tq_bp); 825 826 if (vdp->xdf_f_act == NULL) { 827 /* this is only only IO on the active queue */ 828 ASSERT(vdp->xdf_l_act == NULL); 829 ASSERT(vdp->xdf_i_act == NULL); 830 vdp->xdf_f_act = vdp->xdf_l_act = vdp->xdf_i_act = bp; 831 return; 832 } 833 834 /* add this IO to the tail of the active queue */ 835 vdp->xdf_l_act->av_forw = bp; 836 vdp->xdf_l_act = bp; 837 if (vdp->xdf_i_act == NULL) 838 vdp->xdf_i_act = bp; 839 } 840 841 static void 842 xdf_bp_pop(xdf_t *vdp, buf_t *bp) 843 { 844 buf_t *bp_iter; 845 846 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 847 ASSERT(VREQ_DONE(BP_VREQ(bp))); 848 849 if (vdp->xdf_ready_tq_bp == bp) { 850 /* we're done with a ready thread IO request */ 851 ASSERT(bp->av_forw == NULL); 852 vdp->xdf_ready_tq_bp = NULL; 853 return; 854 } 855 856 /* we're done with a normal IO request */ 857 ASSERT((bp->av_forw != NULL) || (bp == vdp->xdf_l_act)); 858 ASSERT((bp->av_forw == NULL) || (bp != vdp->xdf_l_act)); 859 ASSERT(VREQ_DONE(BP_VREQ(vdp->xdf_f_act))); 860 ASSERT(vdp->xdf_f_act != vdp->xdf_i_act); 861 862 if (bp == vdp->xdf_f_act) { 863 /* This IO was at the head of our active queue. */ 864 vdp->xdf_f_act = bp->av_forw; 865 if (bp == vdp->xdf_l_act) 866 vdp->xdf_l_act = NULL; 867 } else { 868 /* There IO finished before some other pending IOs. */ 869 bp_iter = vdp->xdf_f_act; 870 while (bp != bp_iter->av_forw) { 871 bp_iter = bp_iter->av_forw; 872 ASSERT(VREQ_DONE(BP_VREQ(bp_iter))); 873 ASSERT(bp_iter != vdp->xdf_i_act); 874 } 875 bp_iter->av_forw = bp->av_forw; 876 if (bp == vdp->xdf_l_act) 877 vdp->xdf_l_act = bp_iter; 878 } 879 bp->av_forw = NULL; 880 } 881 882 static buf_t * 883 xdf_bp_next(xdf_t *vdp) 884 { 885 v_req_t *vreq; 886 buf_t *bp; 887 888 if (vdp->xdf_state == XD_CONNECTED) { 889 /* 890 * If we're in the XD_CONNECTED state, we only service IOs 891 * from the xdf_ready_tq_thread thread. 892 */ 893 if ((bp = vdp->xdf_ready_tq_bp) == NULL) 894 return (NULL); 895 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 896 return (bp); 897 return (NULL); 898 } 899 900 /* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */ 901 if (vdp->xdf_state != XD_READY) 902 return (NULL); 903 904 ASSERT(vdp->xdf_ready_tq_bp == NULL); 905 for (;;) { 906 if ((bp = vdp->xdf_i_act) == NULL) 907 return (NULL); 908 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) 909 return (bp); 910 911 /* advance the active buf index pointer */ 912 vdp->xdf_i_act = bp->av_forw; 913 } 914 } 915 916 static void 917 xdf_io_fini(xdf_t *vdp, uint64_t id, int bioerr) 918 { 919 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; 920 v_req_t *vreq = gs->gs_vreq; 921 buf_t *bp = vreq->v_buf; 922 923 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 924 ASSERT(BP_VREQ(bp) == vreq); 925 926 gs_free(gs); 927 928 if (bioerr != 0) 929 bioerror(bp, bioerr); 930 ASSERT(vreq->v_nslots > 0); 931 if (--vreq->v_nslots > 0) 932 return; 933 934 /* remove this IO from our active queue */ 935 xdf_bp_pop(vdp, bp); 936 937 ASSERT(vreq->v_runq); 938 xdf_kstat_exit(vdp, bp); 939 vreq->v_runq = B_FALSE; 940 vreq_free(vdp, vreq); 941 942 if (IS_ERROR(bp)) { 943 xdf_io_err(bp, geterror(bp), 0); 944 } else if (bp->b_resid != 0) { 945 /* Partial transfers are an error */ 946 xdf_io_err(bp, EIO, bp->b_resid); 947 } else { 948 biodone(bp); 949 } 950 } 951 952 /* 953 * xdf interrupt handler 954 */ 955 static uint_t 956 xdf_intr_locked(xdf_t *vdp) 957 { 958 xendev_ring_t *xbr; 959 blkif_response_t *resp; 960 int bioerr; 961 uint64_t id; 962 uint8_t op; 963 uint16_t status; 964 ddi_acc_handle_t acchdl; 965 966 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 967 968 if ((xbr = vdp->xdf_xb_ring) == NULL) 969 return (DDI_INTR_UNCLAIMED); 970 971 acchdl = vdp->xdf_xb_ring_hdl; 972 973 /* 974 * complete all requests which have a response 975 */ 976 while (resp = xvdi_ring_get_response(xbr)) { 977 id = ddi_get64(acchdl, &resp->id); 978 op = ddi_get8(acchdl, &resp->operation); 979 status = ddi_get16(acchdl, (uint16_t *)&resp->status); 980 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", 981 op, id, status)); 982 983 if (status != BLKIF_RSP_OKAY) { 984 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", 985 vdp->xdf_addr, 986 (op == BLKIF_OP_READ) ? "reading" : "writing")); 987 bioerr = EIO; 988 } else { 989 bioerr = 0; 990 } 991 992 xdf_io_fini(vdp, id, bioerr); 993 } 994 return (DDI_INTR_CLAIMED); 995 } 996 997 /* 998 * xdf_intr runs at PIL 5, so no one else can grab xdf_dev_lk and 999 * block at a lower pil. 1000 */ 1001 static uint_t 1002 xdf_intr(caddr_t arg) 1003 { 1004 xdf_t *vdp = (xdf_t *)arg; 1005 int rv; 1006 1007 mutex_enter(&vdp->xdf_dev_lk); 1008 rv = xdf_intr_locked(vdp); 1009 mutex_exit(&vdp->xdf_dev_lk); 1010 1011 if (!do_polled_io) 1012 xdf_io_start(vdp); 1013 1014 return (rv); 1015 } 1016 1017 static void 1018 xdf_ring_push(xdf_t *vdp) 1019 { 1020 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1021 1022 if (vdp->xdf_xb_ring == NULL) 1023 return; 1024 1025 if (xvdi_ring_push_request(vdp->xdf_xb_ring)) { 1026 DPRINTF(IO_DBG, ( 1027 "xdf@%s: xdf_ring_push: sent request(s) to backend\n", 1028 vdp->xdf_addr)); 1029 } 1030 1031 if (xvdi_get_evtchn(vdp->xdf_dip) != INVALID_EVTCHN) 1032 xvdi_notify_oe(vdp->xdf_dip); 1033 } 1034 1035 static int 1036 xdf_ring_drain_locked(xdf_t *vdp) 1037 { 1038 int pollc, rv = 0; 1039 1040 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1041 1042 if (xdf_debug & SUSRES_DBG) 1043 xen_printf("xdf_ring_drain: start\n"); 1044 1045 for (pollc = 0; pollc < XDF_DRAIN_RETRY_COUNT; pollc++) { 1046 if (vdp->xdf_xb_ring == NULL) 1047 goto out; 1048 1049 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1050 (void) xdf_intr_locked(vdp); 1051 if (!xvdi_ring_has_incomp_request(vdp->xdf_xb_ring)) 1052 goto out; 1053 xdf_ring_push(vdp); 1054 1055 /* file-backed devices can be slow */ 1056 mutex_exit(&vdp->xdf_dev_lk); 1057 #ifdef XPV_HVM_DRIVER 1058 (void) HYPERVISOR_yield(); 1059 #endif /* XPV_HVM_DRIVER */ 1060 delay(drv_usectohz(XDF_DRAIN_MSEC_DELAY)); 1061 mutex_enter(&vdp->xdf_dev_lk); 1062 } 1063 cmn_err(CE_WARN, "xdf@%s: xdf_ring_drain: timeout", vdp->xdf_addr); 1064 1065 out: 1066 if (vdp->xdf_xb_ring != NULL) { 1067 if (xvdi_ring_has_incomp_request(vdp->xdf_xb_ring) || 1068 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) 1069 rv = EIO; 1070 } 1071 if (xdf_debug & SUSRES_DBG) 1072 xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n", 1073 vdp->xdf_addr, rv); 1074 return (rv); 1075 } 1076 1077 static int 1078 xdf_ring_drain(xdf_t *vdp) 1079 { 1080 int rv; 1081 mutex_enter(&vdp->xdf_dev_lk); 1082 rv = xdf_ring_drain_locked(vdp); 1083 mutex_exit(&vdp->xdf_dev_lk); 1084 return (rv); 1085 } 1086 1087 /* 1088 * Destroy all v_req_t, grant table entries, and our ring buffer. 1089 */ 1090 static void 1091 xdf_ring_destroy(xdf_t *vdp) 1092 { 1093 v_req_t *vreq; 1094 buf_t *bp; 1095 ge_slot_t *gs; 1096 1097 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1098 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1099 1100 if ((vdp->xdf_state != XD_INIT) && 1101 (vdp->xdf_state != XD_CONNECTED) && 1102 (vdp->xdf_state != XD_READY)) { 1103 ASSERT(vdp->xdf_xb_ring == NULL); 1104 ASSERT(vdp->xdf_xb_ring_hdl == NULL); 1105 ASSERT(vdp->xdf_peer == INVALID_DOMID); 1106 ASSERT(vdp->xdf_evtchn == INVALID_EVTCHN); 1107 ASSERT(list_is_empty(&vdp->xdf_vreq_act)); 1108 return; 1109 } 1110 1111 /* 1112 * We don't want to receive async notifications from the backend 1113 * when it finishes processing ring entries. 1114 */ 1115 #ifdef XPV_HVM_DRIVER 1116 ec_unbind_evtchn(vdp->xdf_evtchn); 1117 #else /* !XPV_HVM_DRIVER */ 1118 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1119 #endif /* !XPV_HVM_DRIVER */ 1120 1121 /* 1122 * Drain any requests in the ring. We need to do this before we 1123 * can free grant table entries, because if active ring entries 1124 * point to grants, then the backend could be trying to access 1125 * those grants. 1126 */ 1127 (void) xdf_ring_drain_locked(vdp); 1128 1129 /* We're done talking to the backend so free up our event channel */ 1130 xvdi_free_evtchn(vdp->xdf_dip); 1131 vdp->xdf_evtchn = INVALID_EVTCHN; 1132 1133 while ((vreq = list_head(&vdp->xdf_vreq_act)) != NULL) { 1134 bp = vreq->v_buf; 1135 ASSERT(BP_VREQ(bp) == vreq); 1136 1137 /* Free up any grant table entries associaed with this IO */ 1138 while ((gs = list_head(&vreq->v_gs)) != NULL) 1139 gs_free(gs); 1140 1141 /* If this IO was on the runq, move it back to the waitq. */ 1142 if (vreq->v_runq) 1143 xdf_kstat_runq_to_waitq(vdp, bp); 1144 1145 /* 1146 * Reset any buf IO state since we're going to re-issue the 1147 * IO when we reconnect. 1148 */ 1149 vreq_free(vdp, vreq); 1150 BP_VREQ_SET(bp, NULL); 1151 bioerror(bp, 0); 1152 } 1153 1154 /* reset the active queue index pointer */ 1155 vdp->xdf_i_act = vdp->xdf_f_act; 1156 1157 /* Destroy the ring */ 1158 xvdi_free_ring(vdp->xdf_xb_ring); 1159 vdp->xdf_xb_ring = NULL; 1160 vdp->xdf_xb_ring_hdl = NULL; 1161 vdp->xdf_peer = INVALID_DOMID; 1162 } 1163 1164 void 1165 xdfmin(struct buf *bp) 1166 { 1167 if (bp->b_bcount > xdf_maxphys) 1168 bp->b_bcount = xdf_maxphys; 1169 } 1170 1171 /* 1172 * Check if we have a pending "eject" media request. 1173 */ 1174 static int 1175 xdf_eject_pending(xdf_t *vdp) 1176 { 1177 dev_info_t *dip = vdp->xdf_dip; 1178 char *xsname, *str; 1179 1180 if (!vdp->xdf_media_req_supported) 1181 return (B_FALSE); 1182 1183 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1184 (xenbus_read_str(xsname, XBP_MEDIA_REQ, &str) != 0)) 1185 return (B_FALSE); 1186 1187 if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) { 1188 strfree(str); 1189 return (B_FALSE); 1190 } 1191 strfree(str); 1192 return (B_TRUE); 1193 } 1194 1195 /* 1196 * Generate a media request. 1197 */ 1198 static int 1199 xdf_media_req(xdf_t *vdp, char *req, boolean_t media_required) 1200 { 1201 dev_info_t *dip = vdp->xdf_dip; 1202 char *xsname; 1203 1204 /* 1205 * we can't be holding xdf_dev_lk because xenbus_printf() can 1206 * block while waiting for a PIL 1 interrupt message. this 1207 * would cause a deadlock with xdf_intr() which needs to grab 1208 * xdf_dev_lk as well and runs at PIL 5. 1209 */ 1210 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1211 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1212 1213 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1214 return (ENXIO); 1215 1216 /* Check if we support media requests */ 1217 if (!XD_IS_CD(vdp) || !vdp->xdf_media_req_supported) 1218 return (ENOTTY); 1219 1220 /* If an eject is pending then don't allow any new requests */ 1221 if (xdf_eject_pending(vdp)) 1222 return (ENXIO); 1223 1224 /* Make sure that there is media present */ 1225 if (media_required && (vdp->xdf_xdev_nblocks == 0)) 1226 return (ENXIO); 1227 1228 /* We only allow operations when the device is ready and connected */ 1229 if (vdp->xdf_state != XD_READY) 1230 return (EIO); 1231 1232 if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ, "%s", req) != 0) 1233 return (EIO); 1234 1235 return (0); 1236 } 1237 1238 /* 1239 * populate a single blkif_request_t w/ a buf 1240 */ 1241 static void 1242 xdf_process_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) 1243 { 1244 grant_ref_t gr; 1245 uint8_t fsect, lsect; 1246 size_t bcnt; 1247 paddr_t dma_addr; 1248 off_t blk_off; 1249 dev_info_t *dip = vdp->xdf_dip; 1250 blkif_vdev_t vdev = xvdi_get_vdevnum(dip); 1251 v_req_t *vreq = BP_VREQ(bp); 1252 uint64_t blkno = vreq->v_blkno; 1253 uint_t ndmacs = vreq->v_ndmacs; 1254 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; 1255 int seg = 0; 1256 int isread = IS_READ(bp); 1257 ge_slot_t *gs = list_head(&vreq->v_gs); 1258 1259 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1260 ASSERT(vreq->v_status == VREQ_GS_ALLOCED); 1261 1262 if (isread) 1263 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); 1264 else { 1265 switch (vreq->v_flush_diskcache) { 1266 case FLUSH_DISKCACHE: 1267 ddi_put8(acchdl, &rreq->operation, 1268 BLKIF_OP_FLUSH_DISKCACHE); 1269 ddi_put16(acchdl, &rreq->handle, vdev); 1270 ddi_put64(acchdl, &rreq->id, 1271 (uint64_t)(uintptr_t)(gs)); 1272 ddi_put8(acchdl, &rreq->nr_segments, 0); 1273 vreq->v_status = VREQ_DMAWIN_DONE; 1274 return; 1275 case WRITE_BARRIER: 1276 ddi_put8(acchdl, &rreq->operation, 1277 BLKIF_OP_WRITE_BARRIER); 1278 break; 1279 default: 1280 if (!vdp->xdf_wce) 1281 ddi_put8(acchdl, &rreq->operation, 1282 BLKIF_OP_WRITE_BARRIER); 1283 else 1284 ddi_put8(acchdl, &rreq->operation, 1285 BLKIF_OP_WRITE); 1286 break; 1287 } 1288 } 1289 1290 ddi_put16(acchdl, &rreq->handle, vdev); 1291 ddi_put64(acchdl, &rreq->sector_number, blkno); 1292 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(gs)); 1293 1294 /* 1295 * loop until all segments are populated or no more dma cookie in buf 1296 */ 1297 for (;;) { 1298 /* 1299 * Each segment of a blkif request can transfer up to 1300 * one 4K page of data. 1301 */ 1302 bcnt = vreq->v_dmac.dmac_size; 1303 dma_addr = vreq->v_dmac.dmac_laddress; 1304 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); 1305 fsect = blk_off >> XB_BSHIFT; 1306 lsect = fsect + (bcnt >> XB_BSHIFT) - 1; 1307 1308 ASSERT(bcnt <= PAGESIZE); 1309 ASSERT((bcnt % XB_BSIZE) == 0); 1310 ASSERT((blk_off & XB_BMASK) == 0); 1311 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && 1312 lsect < XB_MAX_SEGLEN / XB_BSIZE); 1313 1314 gr = gs_grant(gs, PATOMA(dma_addr) >> PAGESHIFT); 1315 ddi_put32(acchdl, &rreq->seg[seg].gref, gr); 1316 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); 1317 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); 1318 1319 DPRINTF(IO_DBG, ( 1320 "xdf@%s: seg%d: dmacS %lu blk_off %ld\n", 1321 vdp->xdf_addr, seg, vreq->v_dmac.dmac_size, blk_off)); 1322 DPRINTF(IO_DBG, ( 1323 "xdf@%s: seg%d: fs %d ls %d gr %d dma 0x%"PRIx64"\n", 1324 vdp->xdf_addr, seg, fsect, lsect, gr, dma_addr)); 1325 1326 blkno += (bcnt >> XB_BSHIFT); 1327 seg++; 1328 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1329 if (--ndmacs) { 1330 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac); 1331 continue; 1332 } 1333 1334 vreq->v_status = VREQ_DMAWIN_DONE; 1335 vreq->v_blkno = blkno; 1336 break; 1337 } 1338 ddi_put8(acchdl, &rreq->nr_segments, seg); 1339 DPRINTF(IO_DBG, ( 1340 "xdf@%s: xdf_process_rreq: request id=%"PRIx64" ready\n", 1341 vdp->xdf_addr, rreq->id)); 1342 } 1343 1344 static void 1345 xdf_io_start(xdf_t *vdp) 1346 { 1347 struct buf *bp; 1348 v_req_t *vreq; 1349 blkif_request_t *rreq; 1350 boolean_t rreqready = B_FALSE; 1351 1352 mutex_enter(&vdp->xdf_dev_lk); 1353 1354 /* 1355 * Populate the ring request(s). Loop until there is no buf to 1356 * transfer or no free slot available in I/O ring. 1357 */ 1358 for (;;) { 1359 /* don't start any new IO if we're suspending */ 1360 if (vdp->xdf_suspending) 1361 break; 1362 if ((bp = xdf_bp_next(vdp)) == NULL) 1363 break; 1364 1365 /* if the buf doesn't already have a vreq, allocate one */ 1366 if (((vreq = BP_VREQ(bp)) == NULL) && 1367 ((vreq = vreq_get(vdp, bp)) == NULL)) 1368 break; 1369 1370 /* alloc DMA/GTE resources */ 1371 if (vreq_setup(vdp, vreq) != DDI_SUCCESS) 1372 break; 1373 1374 /* get next blkif_request in the ring */ 1375 if ((rreq = xvdi_ring_get_request(vdp->xdf_xb_ring)) == NULL) 1376 break; 1377 bzero(rreq, sizeof (blkif_request_t)); 1378 rreqready = B_TRUE; 1379 1380 /* populate blkif_request with this buf */ 1381 xdf_process_rreq(vdp, bp, rreq); 1382 1383 /* 1384 * This buffer/vreq pair is has been allocated a ring buffer 1385 * resources, so if it isn't already in our runq, add it. 1386 */ 1387 if (!vreq->v_runq) 1388 xdf_kstat_waitq_to_runq(vdp, bp); 1389 } 1390 1391 /* Send the request(s) to the backend */ 1392 if (rreqready) 1393 xdf_ring_push(vdp); 1394 1395 mutex_exit(&vdp->xdf_dev_lk); 1396 } 1397 1398 1399 /* check if partition is open, -1 - check all partitions on the disk */ 1400 static boolean_t 1401 xdf_isopen(xdf_t *vdp, int partition) 1402 { 1403 int i; 1404 ulong_t parbit; 1405 boolean_t rval = B_FALSE; 1406 1407 ASSERT((partition == -1) || 1408 ((partition >= 0) || (partition < XDF_PEXT))); 1409 1410 if (partition == -1) 1411 parbit = (ulong_t)-1; 1412 else 1413 parbit = 1 << partition; 1414 1415 for (i = 0; i < OTYPCNT; i++) { 1416 if (vdp->xdf_vd_open[i] & parbit) 1417 rval = B_TRUE; 1418 } 1419 1420 return (rval); 1421 } 1422 1423 /* 1424 * The connection should never be closed as long as someone is holding 1425 * us open, there is pending IO, or someone is waiting waiting for a 1426 * connection. 1427 */ 1428 static boolean_t 1429 xdf_busy(xdf_t *vdp) 1430 { 1431 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1432 1433 if ((vdp->xdf_xb_ring != NULL) && 1434 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { 1435 ASSERT(vdp->xdf_state != XD_CLOSED); 1436 return (B_TRUE); 1437 } 1438 1439 if (!list_is_empty(&vdp->xdf_vreq_act) || (vdp->xdf_f_act != NULL)) { 1440 ASSERT(vdp->xdf_state != XD_CLOSED); 1441 return (B_TRUE); 1442 } 1443 1444 if (xdf_isopen(vdp, -1)) { 1445 ASSERT(vdp->xdf_state != XD_CLOSED); 1446 return (B_TRUE); 1447 } 1448 1449 if (vdp->xdf_connect_req > 0) { 1450 ASSERT(vdp->xdf_state != XD_CLOSED); 1451 return (B_TRUE); 1452 } 1453 1454 return (B_FALSE); 1455 } 1456 1457 static void 1458 xdf_set_state(xdf_t *vdp, xdf_state_t new_state) 1459 { 1460 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1461 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1462 DPRINTF(DDI_DBG, ("xdf@%s: state change %d -> %d\n", 1463 vdp->xdf_addr, vdp->xdf_state, new_state)); 1464 vdp->xdf_state = new_state; 1465 cv_broadcast(&vdp->xdf_dev_cv); 1466 } 1467 1468 static void 1469 xdf_disconnect(xdf_t *vdp, xdf_state_t new_state, boolean_t quiet) 1470 { 1471 dev_info_t *dip = vdp->xdf_dip; 1472 boolean_t busy; 1473 1474 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1475 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1476 ASSERT((new_state == XD_UNKNOWN) || (new_state == XD_CLOSED)); 1477 1478 /* Check if we're already there. */ 1479 if (vdp->xdf_state == new_state) 1480 return; 1481 1482 mutex_enter(&vdp->xdf_dev_lk); 1483 busy = xdf_busy(vdp); 1484 1485 /* If we're already closed then there's nothing todo. */ 1486 if (vdp->xdf_state == XD_CLOSED) { 1487 ASSERT(!busy); 1488 xdf_set_state(vdp, new_state); 1489 mutex_exit(&vdp->xdf_dev_lk); 1490 return; 1491 } 1492 1493 #ifdef DEBUG 1494 /* UhOh. Warn the user that something bad has happened. */ 1495 if (!quiet && busy && (vdp->xdf_state == XD_READY) && 1496 (vdp->xdf_xdev_nblocks != 0)) { 1497 cmn_err(CE_WARN, "xdf@%s: disconnected while in use", 1498 vdp->xdf_addr); 1499 } 1500 #endif /* DEBUG */ 1501 1502 xdf_ring_destroy(vdp); 1503 1504 /* If we're busy then we can only go into the unknown state */ 1505 xdf_set_state(vdp, (busy) ? XD_UNKNOWN : new_state); 1506 mutex_exit(&vdp->xdf_dev_lk); 1507 1508 /* if we're closed now, let the other end know */ 1509 if (vdp->xdf_state == XD_CLOSED) 1510 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1511 } 1512 1513 1514 /* 1515 * Kick-off connect process 1516 * Status should be XD_UNKNOWN or XD_CLOSED 1517 * On success, status will be changed to XD_INIT 1518 * On error, it will be changed to XD_UNKNOWN 1519 */ 1520 static int 1521 xdf_setstate_init(xdf_t *vdp) 1522 { 1523 dev_info_t *dip = vdp->xdf_dip; 1524 xenbus_transaction_t xbt; 1525 grant_ref_t gref; 1526 char *xsname, *str; 1527 int rv; 1528 1529 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1530 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1531 ASSERT((vdp->xdf_state == XD_UNKNOWN) || 1532 (vdp->xdf_state == XD_CLOSED)); 1533 1534 DPRINTF(DDI_DBG, 1535 ("xdf@%s: starting connection process\n", vdp->xdf_addr)); 1536 1537 /* 1538 * If an eject is pending then don't allow a new connection. 1539 * (Only the backend can clear media request eject request.) 1540 */ 1541 if (xdf_eject_pending(vdp)) 1542 return (DDI_FAILURE); 1543 1544 if ((xsname = xvdi_get_xsname(dip)) == NULL) 1545 goto errout; 1546 1547 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == INVALID_DOMID) 1548 goto errout; 1549 1550 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialising); 1551 1552 /* 1553 * Sanity check for the existance of the xenbus device-type property. 1554 * This property might not exist if our xenbus device nodes were 1555 * force destroyed while we were still connected to the backend. 1556 */ 1557 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) 1558 goto errout; 1559 strfree(str); 1560 1561 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) 1562 goto errout; 1563 1564 vdp->xdf_evtchn = xvdi_get_evtchn(dip); 1565 #ifdef XPV_HVM_DRIVER 1566 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); 1567 #else /* !XPV_HVM_DRIVER */ 1568 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != 1569 DDI_SUCCESS) { 1570 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_init: " 1571 "failed to add intr handler", vdp->xdf_addr); 1572 goto errout1; 1573 } 1574 #endif /* !XPV_HVM_DRIVER */ 1575 1576 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE, 1577 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != 1578 DDI_SUCCESS) { 1579 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", 1580 vdp->xdf_addr); 1581 goto errout2; 1582 } 1583 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ 1584 1585 /* 1586 * Write into xenstore the info needed by backend 1587 */ 1588 trans_retry: 1589 if (xenbus_transaction_start(&xbt)) { 1590 cmn_err(CE_WARN, "xdf@%s: failed to start transaction", 1591 vdp->xdf_addr); 1592 xvdi_fatal_error(dip, EIO, "connect transaction init"); 1593 goto fail_trans; 1594 } 1595 1596 /* 1597 * XBP_PROTOCOL is written by the domain builder in the case of PV 1598 * domains. However, it is not written for HVM domains, so let's 1599 * write it here. 1600 */ 1601 if (((rv = xenbus_printf(xbt, xsname, 1602 XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE)) != 0) || 1603 ((rv = xenbus_printf(xbt, xsname, 1604 XBP_RING_REF, "%u", gref)) != 0) || 1605 ((rv = xenbus_printf(xbt, xsname, 1606 XBP_EVENT_CHAN, "%u", vdp->xdf_evtchn)) != 0) || 1607 ((rv = xenbus_printf(xbt, xsname, 1608 XBP_PROTOCOL, "%s", XEN_IO_PROTO_ABI_NATIVE)) != 0) || 1609 ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0)) { 1610 (void) xenbus_transaction_end(xbt, 1); 1611 xvdi_fatal_error(dip, rv, "connect transaction setup"); 1612 goto fail_trans; 1613 } 1614 1615 /* kick-off connect process */ 1616 if (rv = xenbus_transaction_end(xbt, 0)) { 1617 if (rv == EAGAIN) 1618 goto trans_retry; 1619 xvdi_fatal_error(dip, rv, "connect transaction commit"); 1620 goto fail_trans; 1621 } 1622 1623 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1624 mutex_enter(&vdp->xdf_dev_lk); 1625 xdf_set_state(vdp, XD_INIT); 1626 mutex_exit(&vdp->xdf_dev_lk); 1627 1628 return (DDI_SUCCESS); 1629 1630 fail_trans: 1631 xvdi_free_ring(vdp->xdf_xb_ring); 1632 errout2: 1633 #ifdef XPV_HVM_DRIVER 1634 ec_unbind_evtchn(vdp->xdf_evtchn); 1635 #else /* !XPV_HVM_DRIVER */ 1636 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); 1637 #endif /* !XPV_HVM_DRIVER */ 1638 errout1: 1639 xvdi_free_evtchn(dip); 1640 vdp->xdf_evtchn = INVALID_EVTCHN; 1641 errout: 1642 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1643 cmn_err(CE_WARN, "xdf@%s: failed to start connection to backend", 1644 vdp->xdf_addr); 1645 return (DDI_FAILURE); 1646 } 1647 1648 int 1649 xdf_get_flush_block(xdf_t *vdp) 1650 { 1651 /* 1652 * Get a DEV_BSIZE aligned bufer 1653 */ 1654 vdp->xdf_flush_mem = kmem_alloc(vdp->xdf_xdev_secsize * 2, KM_SLEEP); 1655 vdp->xdf_cache_flush_block = 1656 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), 1657 (int)vdp->xdf_xdev_secsize); 1658 1659 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, 1660 xdf_flush_block, vdp->xdf_xdev_secsize, NULL) != 0) 1661 return (DDI_FAILURE); 1662 return (DDI_SUCCESS); 1663 } 1664 1665 static void 1666 xdf_setstate_ready(void *arg) 1667 { 1668 xdf_t *vdp = (xdf_t *)arg; 1669 1670 vdp->xdf_ready_tq_thread = curthread; 1671 1672 /* 1673 * We've created all the minor nodes via cmlb_attach() using default 1674 * value in xdf_attach() to make it possible to block in xdf_open(), 1675 * in case there's anyone (say, booting thread) ever trying to open 1676 * it before connected to backend. We will refresh all those minor 1677 * nodes w/ latest info we've got now when we are almost connected. 1678 */ 1679 mutex_enter(&vdp->xdf_dev_lk); 1680 if (vdp->xdf_cmbl_reattach) { 1681 vdp->xdf_cmbl_reattach = B_FALSE; 1682 1683 mutex_exit(&vdp->xdf_dev_lk); 1684 if (xdf_cmlb_attach(vdp) != 0) { 1685 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1686 return; 1687 } 1688 mutex_enter(&vdp->xdf_dev_lk); 1689 } 1690 1691 /* If we're not still trying to get to the ready state, then bail. */ 1692 if (vdp->xdf_state != XD_CONNECTED) { 1693 mutex_exit(&vdp->xdf_dev_lk); 1694 return; 1695 } 1696 mutex_exit(&vdp->xdf_dev_lk); 1697 1698 /* 1699 * If backend has feature-barrier, see if it supports disk 1700 * cache flush op. 1701 */ 1702 vdp->xdf_flush_supported = B_FALSE; 1703 if (vdp->xdf_feature_barrier) { 1704 /* 1705 * Pretend we already know flush is supported so probe 1706 * will attempt the correct op. 1707 */ 1708 vdp->xdf_flush_supported = B_TRUE; 1709 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { 1710 vdp->xdf_flush_supported = B_TRUE; 1711 } else { 1712 vdp->xdf_flush_supported = B_FALSE; 1713 /* 1714 * If the other end does not support the cache flush op 1715 * then we must use a barrier-write to force disk 1716 * cache flushing. Barrier writes require that a data 1717 * block actually be written. 1718 * Cache a block to barrier-write when we are 1719 * asked to perform a flush. 1720 * XXX - would it be better to just copy 1 block 1721 * (512 bytes) from whatever write we did last 1722 * and rewrite that block? 1723 */ 1724 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) { 1725 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1726 return; 1727 } 1728 } 1729 } 1730 1731 mutex_enter(&vdp->xdf_cb_lk); 1732 mutex_enter(&vdp->xdf_dev_lk); 1733 if (vdp->xdf_state == XD_CONNECTED) 1734 xdf_set_state(vdp, XD_READY); 1735 mutex_exit(&vdp->xdf_dev_lk); 1736 1737 /* Restart any currently queued up io */ 1738 xdf_io_start(vdp); 1739 1740 mutex_exit(&vdp->xdf_cb_lk); 1741 } 1742 1743 /* 1744 * synthetic geometry 1745 */ 1746 #define XDF_NSECTS 256 1747 #define XDF_NHEADS 16 1748 1749 static void 1750 xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp) 1751 { 1752 xdf_t *vdp; 1753 uint_t ncyl; 1754 1755 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 1756 1757 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); 1758 1759 bzero(geomp, sizeof (*geomp)); 1760 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; 1761 geomp->g_acyl = 0; 1762 geomp->g_nhead = XDF_NHEADS; 1763 geomp->g_nsect = XDF_NSECTS; 1764 geomp->g_secsize = vdp->xdf_xdev_secsize; 1765 geomp->g_capacity = vdp->xdf_xdev_nblocks; 1766 geomp->g_intrlv = 0; 1767 geomp->g_rpm = 7200; 1768 } 1769 1770 /* 1771 * Finish other initialization after we've connected to backend 1772 * Status should be XD_INIT before calling this routine 1773 * On success, status should be changed to XD_CONNECTED. 1774 * On error, status should stay XD_INIT 1775 */ 1776 static int 1777 xdf_setstate_connected(xdf_t *vdp) 1778 { 1779 dev_info_t *dip = vdp->xdf_dip; 1780 cmlb_geom_t pgeom; 1781 diskaddr_t nblocks = 0; 1782 uint_t secsize = 0; 1783 char *oename, *xsname, *str; 1784 uint_t dinfo; 1785 1786 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1787 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); 1788 ASSERT(vdp->xdf_state == XD_INIT); 1789 1790 if (((xsname = xvdi_get_xsname(dip)) == NULL) || 1791 ((oename = xvdi_get_oename(dip)) == NULL)) 1792 return (DDI_FAILURE); 1793 1794 /* Make sure the other end is XenbusStateConnected */ 1795 if (xenbus_read_driver_state(oename) != XenbusStateConnected) 1796 return (DDI_FAILURE); 1797 1798 /* Determine if feature barrier is supported by backend */ 1799 if (!(vdp->xdf_feature_barrier = xenbus_exists(oename, XBP_FB))) 1800 cmn_err(CE_NOTE, "!xdf@%s: feature-barrier not supported", 1801 vdp->xdf_addr); 1802 1803 /* 1804 * Probe backend. Read the device size into xdf_xdev_nblocks 1805 * and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE 1806 * flags in xdf_dinfo. If the emulated device type is "cdrom", 1807 * we always set VDISK_CDROM, regardless of if it's present in 1808 * the xenbus info parameter. 1809 */ 1810 if (xenbus_gather(XBT_NULL, oename, 1811 XBP_SECTORS, "%"SCNu64, &nblocks, 1812 XBP_SECTOR_SIZE, "%u", &secsize, 1813 XBP_INFO, "%u", &dinfo, 1814 NULL) != 0) { 1815 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1816 "cannot read backend info", vdp->xdf_addr); 1817 return (DDI_FAILURE); 1818 } 1819 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 1820 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 1821 vdp->xdf_addr); 1822 return (DDI_FAILURE); 1823 } 1824 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 1825 dinfo |= VDISK_CDROM; 1826 strfree(str); 1827 1828 if (secsize == 0 || !(ISP2(secsize / DEV_BSIZE))) 1829 secsize = DEV_BSIZE; 1830 vdp->xdf_xdev_nblocks = nblocks; 1831 vdp->xdf_xdev_secsize = secsize; 1832 #ifdef _ILP32 1833 if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) { 1834 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " 1835 "backend disk device too large with %llu blocks for" 1836 " 32-bit kernel", vdp->xdf_addr, vdp->xdf_xdev_nblocks); 1837 xvdi_fatal_error(dip, EFBIG, "reading backend info"); 1838 return (DDI_FAILURE); 1839 } 1840 #endif 1841 1842 /* 1843 * If the physical geometry for a fixed disk has been explicity 1844 * set then make sure that the specified physical geometry isn't 1845 * larger than the device we connected to. 1846 */ 1847 if (vdp->xdf_pgeom_fixed && 1848 (vdp->xdf_pgeom.g_capacity > vdp->xdf_xdev_nblocks)) { 1849 cmn_err(CE_WARN, 1850 "xdf@%s: connect failed, fixed geometry too large", 1851 vdp->xdf_addr); 1852 return (DDI_FAILURE); 1853 } 1854 1855 vdp->xdf_media_req_supported = xenbus_exists(oename, XBP_MEDIA_REQ_SUP); 1856 1857 /* mark vbd is ready for I/O */ 1858 mutex_enter(&vdp->xdf_dev_lk); 1859 xdf_set_state(vdp, XD_CONNECTED); 1860 1861 /* check if the cmlb label should be updated */ 1862 xdf_synthetic_pgeom(dip, &pgeom); 1863 if ((vdp->xdf_dinfo != dinfo) || 1864 (!vdp->xdf_pgeom_fixed && 1865 (memcmp(&vdp->xdf_pgeom, &pgeom, sizeof (pgeom)) != 0))) { 1866 vdp->xdf_cmbl_reattach = B_TRUE; 1867 1868 vdp->xdf_dinfo = dinfo; 1869 if (!vdp->xdf_pgeom_fixed) 1870 vdp->xdf_pgeom = pgeom; 1871 } 1872 1873 if (XD_IS_CD(vdp) || XD_IS_RM(vdp)) { 1874 if (vdp->xdf_xdev_nblocks == 0) { 1875 vdp->xdf_mstate = DKIO_EJECTED; 1876 cv_broadcast(&vdp->xdf_mstate_cv); 1877 } else { 1878 vdp->xdf_mstate = DKIO_INSERTED; 1879 cv_broadcast(&vdp->xdf_mstate_cv); 1880 } 1881 } else { 1882 if (vdp->xdf_mstate != DKIO_NONE) { 1883 vdp->xdf_mstate = DKIO_NONE; 1884 cv_broadcast(&vdp->xdf_mstate_cv); 1885 } 1886 } 1887 1888 mutex_exit(&vdp->xdf_dev_lk); 1889 1890 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", vdp->xdf_addr, 1891 (uint64_t)vdp->xdf_xdev_nblocks); 1892 1893 /* Restart any currently queued up io */ 1894 xdf_io_start(vdp); 1895 1896 /* 1897 * To get to the ready state we have to do IO to the backend device, 1898 * but we can't initiate IO from the other end change callback thread 1899 * (which is the current context we're executing in.) This is because 1900 * if the other end disconnects while we're doing IO from the callback 1901 * thread, then we can't receive that disconnect event and we hang 1902 * waiting for an IO that can never complete. 1903 */ 1904 (void) ddi_taskq_dispatch(vdp->xdf_ready_tq, xdf_setstate_ready, vdp, 1905 DDI_SLEEP); 1906 1907 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1908 return (DDI_SUCCESS); 1909 } 1910 1911 /*ARGSUSED*/ 1912 static void 1913 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) 1914 { 1915 XenbusState new_state = *(XenbusState *)impl_data; 1916 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 1917 1918 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", 1919 vdp->xdf_addr, new_state)); 1920 1921 mutex_enter(&vdp->xdf_cb_lk); 1922 1923 /* We assume that this callback is single threaded */ 1924 ASSERT(vdp->xdf_oe_change_thread == NULL); 1925 DEBUG_EVAL(vdp->xdf_oe_change_thread = curthread); 1926 1927 /* ignore any backend state changes if we're suspending/suspended */ 1928 if (vdp->xdf_suspending || (vdp->xdf_state == XD_SUSPEND)) { 1929 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1930 mutex_exit(&vdp->xdf_cb_lk); 1931 return; 1932 } 1933 1934 switch (new_state) { 1935 case XenbusStateUnknown: 1936 case XenbusStateInitialising: 1937 case XenbusStateInitWait: 1938 case XenbusStateInitialised: 1939 if (vdp->xdf_state == XD_INIT) 1940 break; 1941 1942 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1943 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1944 break; 1945 ASSERT(vdp->xdf_state == XD_INIT); 1946 break; 1947 1948 case XenbusStateConnected: 1949 if ((vdp->xdf_state == XD_CONNECTED) || 1950 (vdp->xdf_state == XD_READY)) 1951 break; 1952 1953 if (vdp->xdf_state != XD_INIT) { 1954 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1955 if (xdf_setstate_init(vdp) != DDI_SUCCESS) 1956 break; 1957 ASSERT(vdp->xdf_state == XD_INIT); 1958 } 1959 1960 if (xdf_setstate_connected(vdp) != DDI_SUCCESS) { 1961 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); 1962 break; 1963 } 1964 ASSERT(vdp->xdf_state == XD_CONNECTED); 1965 break; 1966 1967 case XenbusStateClosing: 1968 if (xdf_isopen(vdp, -1)) { 1969 cmn_err(CE_NOTE, 1970 "xdf@%s: hot-unplug failed, still in use", 1971 vdp->xdf_addr); 1972 break; 1973 } 1974 /*FALLTHROUGH*/ 1975 case XenbusStateClosed: 1976 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 1977 break; 1978 } 1979 1980 /* notify anybody waiting for oe state change */ 1981 cv_broadcast(&vdp->xdf_dev_cv); 1982 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); 1983 mutex_exit(&vdp->xdf_cb_lk); 1984 } 1985 1986 static int 1987 xdf_connect_locked(xdf_t *vdp, boolean_t wait) 1988 { 1989 int rv, timeouts = 0, reset = 20; 1990 1991 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 1992 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 1993 1994 /* we can't connect once we're in the closed state */ 1995 if (vdp->xdf_state == XD_CLOSED) 1996 return (XD_CLOSED); 1997 1998 vdp->xdf_connect_req++; 1999 while (vdp->xdf_state != XD_READY) { 2000 mutex_exit(&vdp->xdf_dev_lk); 2001 2002 /* only one thread at a time can be the connection thread */ 2003 if (vdp->xdf_connect_thread == NULL) 2004 vdp->xdf_connect_thread = curthread; 2005 2006 if (vdp->xdf_connect_thread == curthread) { 2007 if ((timeouts > 0) && ((timeouts % reset) == 0)) { 2008 /* 2009 * If we haven't establised a connection 2010 * within the reset time, then disconnect 2011 * so we can try again, and double the reset 2012 * time. The reset time starts at 2 sec. 2013 */ 2014 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 2015 reset *= 2; 2016 } 2017 if (vdp->xdf_state == XD_UNKNOWN) 2018 (void) xdf_setstate_init(vdp); 2019 if (vdp->xdf_state == XD_INIT) 2020 (void) xdf_setstate_connected(vdp); 2021 } 2022 2023 mutex_enter(&vdp->xdf_dev_lk); 2024 if (!wait || (vdp->xdf_state == XD_READY)) 2025 goto out; 2026 2027 mutex_exit((&vdp->xdf_cb_lk)); 2028 if (vdp->xdf_connect_thread != curthread) { 2029 rv = cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk); 2030 } else { 2031 /* delay for 0.1 sec */ 2032 rv = cv_reltimedwait_sig(&vdp->xdf_dev_cv, 2033 &vdp->xdf_dev_lk, drv_usectohz(100*1000), 2034 TR_CLOCK_TICK); 2035 if (rv == -1) 2036 timeouts++; 2037 } 2038 mutex_exit((&vdp->xdf_dev_lk)); 2039 mutex_enter((&vdp->xdf_cb_lk)); 2040 mutex_enter((&vdp->xdf_dev_lk)); 2041 if (rv == 0) 2042 goto out; 2043 } 2044 2045 out: 2046 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2047 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); 2048 2049 if (vdp->xdf_connect_thread == curthread) { 2050 /* 2051 * wake up someone else so they can become the connection 2052 * thread. 2053 */ 2054 cv_signal(&vdp->xdf_dev_cv); 2055 vdp->xdf_connect_thread = NULL; 2056 } 2057 2058 /* Try to lock the media */ 2059 mutex_exit((&vdp->xdf_dev_lk)); 2060 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2061 mutex_enter((&vdp->xdf_dev_lk)); 2062 2063 vdp->xdf_connect_req--; 2064 return (vdp->xdf_state); 2065 } 2066 2067 static uint_t 2068 xdf_iorestart(caddr_t arg) 2069 { 2070 xdf_t *vdp = (xdf_t *)arg; 2071 2072 ASSERT(vdp != NULL); 2073 2074 mutex_enter(&vdp->xdf_dev_lk); 2075 ASSERT(ISDMACBON(vdp)); 2076 SETDMACBOFF(vdp); 2077 mutex_exit(&vdp->xdf_dev_lk); 2078 2079 xdf_io_start(vdp); 2080 2081 return (DDI_INTR_CLAIMED); 2082 } 2083 2084 #ifdef XPV_HVM_DRIVER 2085 2086 typedef struct xdf_hvm_entry { 2087 list_node_t xdf_he_list; 2088 char *xdf_he_path; 2089 dev_info_t *xdf_he_dip; 2090 } xdf_hvm_entry_t; 2091 2092 static list_t xdf_hvm_list; 2093 static kmutex_t xdf_hvm_list_lock; 2094 2095 static xdf_hvm_entry_t * 2096 i_xdf_hvm_find(const char *path, dev_info_t *dip) 2097 { 2098 xdf_hvm_entry_t *i; 2099 2100 ASSERT((path != NULL) || (dip != NULL)); 2101 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); 2102 2103 i = list_head(&xdf_hvm_list); 2104 while (i != NULL) { 2105 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { 2106 i = list_next(&xdf_hvm_list, i); 2107 continue; 2108 } 2109 if ((dip != NULL) && (i->xdf_he_dip != dip)) { 2110 i = list_next(&xdf_hvm_list, i); 2111 continue; 2112 } 2113 break; 2114 } 2115 return (i); 2116 } 2117 2118 dev_info_t * 2119 xdf_hvm_hold(const char *path) 2120 { 2121 xdf_hvm_entry_t *i; 2122 dev_info_t *dip; 2123 2124 mutex_enter(&xdf_hvm_list_lock); 2125 i = i_xdf_hvm_find(path, NULL); 2126 if (i == NULL) { 2127 mutex_exit(&xdf_hvm_list_lock); 2128 return (B_FALSE); 2129 } 2130 ndi_hold_devi(dip = i->xdf_he_dip); 2131 mutex_exit(&xdf_hvm_list_lock); 2132 return (dip); 2133 } 2134 2135 static void 2136 xdf_hvm_add(dev_info_t *dip) 2137 { 2138 xdf_hvm_entry_t *i; 2139 char *path; 2140 2141 /* figure out the path for the dip */ 2142 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 2143 (void) ddi_pathname(dip, path); 2144 2145 i = kmem_alloc(sizeof (*i), KM_SLEEP); 2146 i->xdf_he_dip = dip; 2147 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); 2148 2149 mutex_enter(&xdf_hvm_list_lock); 2150 ASSERT(i_xdf_hvm_find(path, NULL) == NULL); 2151 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); 2152 list_insert_head(&xdf_hvm_list, i); 2153 mutex_exit(&xdf_hvm_list_lock); 2154 2155 kmem_free(path, MAXPATHLEN); 2156 } 2157 2158 static void 2159 xdf_hvm_rm(dev_info_t *dip) 2160 { 2161 xdf_hvm_entry_t *i; 2162 2163 mutex_enter(&xdf_hvm_list_lock); 2164 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); 2165 list_remove(&xdf_hvm_list, i); 2166 mutex_exit(&xdf_hvm_list_lock); 2167 2168 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); 2169 kmem_free(i, sizeof (*i)); 2170 } 2171 2172 static void 2173 xdf_hvm_init(void) 2174 { 2175 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), 2176 offsetof(xdf_hvm_entry_t, xdf_he_list)); 2177 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); 2178 } 2179 2180 static void 2181 xdf_hvm_fini(void) 2182 { 2183 ASSERT(list_head(&xdf_hvm_list) == NULL); 2184 list_destroy(&xdf_hvm_list); 2185 mutex_destroy(&xdf_hvm_list_lock); 2186 } 2187 2188 boolean_t 2189 xdf_hvm_connect(dev_info_t *dip) 2190 { 2191 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2192 char *oename, *str; 2193 int rv; 2194 2195 mutex_enter(&vdp->xdf_cb_lk); 2196 2197 /* 2198 * Before try to establish a connection we need to wait for the 2199 * backend hotplug scripts to have run. Once they are run the 2200 * "<oename>/hotplug-status" property will be set to "connected". 2201 */ 2202 for (;;) { 2203 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2204 2205 /* 2206 * Get the xenbus path to the backend device. Note that 2207 * we can't cache this path (and we look it up on each pass 2208 * through this loop) because it could change during 2209 * suspend, resume, and migration operations. 2210 */ 2211 if ((oename = xvdi_get_oename(dip)) == NULL) { 2212 mutex_exit(&vdp->xdf_cb_lk); 2213 return (B_FALSE); 2214 } 2215 2216 str = NULL; 2217 if ((xenbus_read_str(oename, XBP_HP_STATUS, &str) == 0) && 2218 (strcmp(str, XBV_HP_STATUS_CONN) == 0)) 2219 break; 2220 2221 if (str != NULL) 2222 strfree(str); 2223 2224 /* wait for an update to "<oename>/hotplug-status" */ 2225 if (cv_wait_sig(&vdp->xdf_hp_status_cv, &vdp->xdf_cb_lk) == 0) { 2226 /* we got interrupted by a signal */ 2227 mutex_exit(&vdp->xdf_cb_lk); 2228 return (B_FALSE); 2229 } 2230 } 2231 2232 /* Good news. The backend hotplug scripts have been run. */ 2233 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); 2234 ASSERT(strcmp(str, XBV_HP_STATUS_CONN) == 0); 2235 strfree(str); 2236 2237 /* 2238 * If we're emulating a cd device and if the backend doesn't support 2239 * media request opreations, then we're not going to bother trying 2240 * to establish a connection for a couple reasons. First off, media 2241 * requests support is required to support operations like eject and 2242 * media locking. Second, other backend platforms like Linux don't 2243 * support hvm pv cdrom access. They don't even have a backend pv 2244 * driver for cdrom device nodes, so we don't want to block forever 2245 * waiting for a connection to a backend driver that doesn't exist. 2246 */ 2247 if (XD_IS_CD(vdp) && !xenbus_exists(oename, XBP_MEDIA_REQ_SUP)) { 2248 mutex_exit(&vdp->xdf_cb_lk); 2249 return (B_FALSE); 2250 } 2251 2252 mutex_enter(&vdp->xdf_dev_lk); 2253 rv = xdf_connect_locked(vdp, B_TRUE); 2254 mutex_exit(&vdp->xdf_dev_lk); 2255 mutex_exit(&vdp->xdf_cb_lk); 2256 2257 return ((rv == XD_READY) ? B_TRUE : B_FALSE); 2258 } 2259 2260 int 2261 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2262 { 2263 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2264 2265 /* sanity check the requested physical geometry */ 2266 mutex_enter(&vdp->xdf_dev_lk); 2267 if ((geomp->g_secsize != XB_BSIZE) || 2268 (geomp->g_capacity == 0)) { 2269 mutex_exit(&vdp->xdf_dev_lk); 2270 return (EINVAL); 2271 } 2272 2273 /* 2274 * If we've already connected to the backend device then make sure 2275 * we're not defining a physical geometry larger than our backend 2276 * device. 2277 */ 2278 if ((vdp->xdf_xdev_nblocks != 0) && 2279 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { 2280 mutex_exit(&vdp->xdf_dev_lk); 2281 return (EINVAL); 2282 } 2283 2284 bzero(&vdp->xdf_pgeom, sizeof (vdp->xdf_pgeom)); 2285 vdp->xdf_pgeom.g_ncyl = geomp->g_ncyl; 2286 vdp->xdf_pgeom.g_acyl = geomp->g_acyl; 2287 vdp->xdf_pgeom.g_nhead = geomp->g_nhead; 2288 vdp->xdf_pgeom.g_nsect = geomp->g_nsect; 2289 vdp->xdf_pgeom.g_secsize = geomp->g_secsize; 2290 vdp->xdf_pgeom.g_capacity = geomp->g_capacity; 2291 vdp->xdf_pgeom.g_intrlv = geomp->g_intrlv; 2292 vdp->xdf_pgeom.g_rpm = geomp->g_rpm; 2293 2294 vdp->xdf_pgeom_fixed = B_TRUE; 2295 mutex_exit(&vdp->xdf_dev_lk); 2296 2297 /* force a re-validation */ 2298 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 2299 2300 return (0); 2301 } 2302 2303 boolean_t 2304 xdf_is_cd(dev_info_t *dip) 2305 { 2306 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2307 boolean_t rv; 2308 2309 mutex_enter(&vdp->xdf_cb_lk); 2310 rv = XD_IS_CD(vdp); 2311 mutex_exit(&vdp->xdf_cb_lk); 2312 return (rv); 2313 } 2314 2315 boolean_t 2316 xdf_is_rm(dev_info_t *dip) 2317 { 2318 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2319 boolean_t rv; 2320 2321 mutex_enter(&vdp->xdf_cb_lk); 2322 rv = XD_IS_RM(vdp); 2323 mutex_exit(&vdp->xdf_cb_lk); 2324 return (rv); 2325 } 2326 2327 boolean_t 2328 xdf_media_req_supported(dev_info_t *dip) 2329 { 2330 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 2331 boolean_t rv; 2332 2333 mutex_enter(&vdp->xdf_cb_lk); 2334 rv = vdp->xdf_media_req_supported; 2335 mutex_exit(&vdp->xdf_cb_lk); 2336 return (rv); 2337 } 2338 2339 #endif /* XPV_HVM_DRIVER */ 2340 2341 static int 2342 xdf_lb_getcap(dev_info_t *dip, diskaddr_t *capp) 2343 { 2344 xdf_t *vdp; 2345 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2346 2347 if (vdp == NULL) 2348 return (ENXIO); 2349 2350 mutex_enter(&vdp->xdf_dev_lk); 2351 *capp = vdp->xdf_pgeom.g_capacity; 2352 DPRINTF(LBL_DBG, ("xdf@%s:capacity %llu\n", vdp->xdf_addr, *capp)); 2353 mutex_exit(&vdp->xdf_dev_lk); 2354 return (0); 2355 } 2356 2357 static int 2358 xdf_lb_getpgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2359 { 2360 xdf_t *vdp; 2361 2362 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 2363 return (ENXIO); 2364 *geomp = vdp->xdf_pgeom; 2365 return (0); 2366 } 2367 2368 /* 2369 * No real HBA, no geometry available from it 2370 */ 2371 /*ARGSUSED*/ 2372 static int 2373 xdf_lb_getvgeom(dev_info_t *dip, cmlb_geom_t *geomp) 2374 { 2375 return (EINVAL); 2376 } 2377 2378 static int 2379 xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep) 2380 { 2381 xdf_t *vdp; 2382 2383 if (!(vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)))) 2384 return (ENXIO); 2385 2386 if (XD_IS_RO(vdp)) 2387 tgattributep->media_is_writable = 0; 2388 else 2389 tgattributep->media_is_writable = 1; 2390 tgattributep->media_is_rotational = 0; 2391 return (0); 2392 } 2393 2394 /* ARGSUSED3 */ 2395 int 2396 xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 2397 { 2398 int instance; 2399 xdf_t *vdp; 2400 2401 instance = ddi_get_instance(dip); 2402 2403 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) 2404 return (ENXIO); 2405 2406 switch (cmd) { 2407 case TG_GETPHYGEOM: 2408 return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg)); 2409 case TG_GETVIRTGEOM: 2410 return (xdf_lb_getvgeom(dip, (cmlb_geom_t *)arg)); 2411 case TG_GETCAPACITY: 2412 return (xdf_lb_getcap(dip, (diskaddr_t *)arg)); 2413 case TG_GETBLOCKSIZE: 2414 mutex_enter(&vdp->xdf_cb_lk); 2415 *(uint32_t *)arg = vdp->xdf_xdev_secsize; 2416 mutex_exit(&vdp->xdf_cb_lk); 2417 return (0); 2418 case TG_GETATTR: 2419 return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg)); 2420 default: 2421 return (ENOTTY); 2422 } 2423 } 2424 2425 /* ARGSUSED5 */ 2426 int 2427 xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp, 2428 diskaddr_t start, size_t reqlen, void *tg_cookie) 2429 { 2430 xdf_t *vdp; 2431 struct buf *bp; 2432 int err = 0; 2433 2434 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 2435 2436 /* We don't allow IO from the oe_change callback thread */ 2437 ASSERT(curthread != vdp->xdf_oe_change_thread); 2438 2439 /* 2440 * Having secsize of 0 means that device isn't connected yet. 2441 * FIXME This happens for CD devices, and there's nothing we 2442 * can do about it at the moment. 2443 */ 2444 if (vdp->xdf_xdev_secsize == 0) 2445 return (EIO); 2446 2447 if ((start + ((reqlen / (vdp->xdf_xdev_secsize / DEV_BSIZE)) 2448 >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) 2449 return (EINVAL); 2450 2451 bp = getrbuf(KM_SLEEP); 2452 if (cmd == TG_READ) 2453 bp->b_flags = B_BUSY | B_READ; 2454 else 2455 bp->b_flags = B_BUSY | B_WRITE; 2456 2457 bp->b_un.b_addr = bufp; 2458 bp->b_bcount = reqlen; 2459 bp->b_blkno = start * (vdp->xdf_xdev_secsize / DEV_BSIZE); 2460 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ 2461 2462 mutex_enter(&vdp->xdf_dev_lk); 2463 xdf_bp_push(vdp, bp); 2464 mutex_exit(&vdp->xdf_dev_lk); 2465 xdf_io_start(vdp); 2466 if (curthread == vdp->xdf_ready_tq_thread) 2467 (void) xdf_ring_drain(vdp); 2468 err = biowait(bp); 2469 ASSERT(bp->b_flags & B_DONE); 2470 freerbuf(bp); 2471 return (err); 2472 } 2473 2474 /* 2475 * Lock the current media. Set the media state to "lock". 2476 * (Media locks are only respected by the backend driver.) 2477 */ 2478 static int 2479 xdf_ioctl_mlock(xdf_t *vdp) 2480 { 2481 int rv; 2482 mutex_enter(&vdp->xdf_cb_lk); 2483 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2484 mutex_exit(&vdp->xdf_cb_lk); 2485 return (rv); 2486 } 2487 2488 /* 2489 * Release a media lock. Set the media state to "none". 2490 */ 2491 static int 2492 xdf_ioctl_munlock(xdf_t *vdp) 2493 { 2494 int rv; 2495 mutex_enter(&vdp->xdf_cb_lk); 2496 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_NONE, B_TRUE); 2497 mutex_exit(&vdp->xdf_cb_lk); 2498 return (rv); 2499 } 2500 2501 /* 2502 * Eject the current media. Ignores any media locks. (Media locks 2503 * are only for benifit of the the backend.) 2504 */ 2505 static int 2506 xdf_ioctl_eject(xdf_t *vdp) 2507 { 2508 int rv; 2509 2510 mutex_enter(&vdp->xdf_cb_lk); 2511 if ((rv = xdf_media_req(vdp, XBV_MEDIA_REQ_EJECT, B_FALSE)) != 0) { 2512 mutex_exit(&vdp->xdf_cb_lk); 2513 return (rv); 2514 } 2515 2516 /* 2517 * We've set the media requests xenbus parameter to eject, so now 2518 * disconnect from the backend, wait for the backend to clear 2519 * the media requets xenbus paramter, and then we can reconnect 2520 * to the backend. 2521 */ 2522 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); 2523 mutex_enter(&vdp->xdf_dev_lk); 2524 if (xdf_connect_locked(vdp, B_TRUE) != XD_READY) { 2525 mutex_exit(&vdp->xdf_dev_lk); 2526 mutex_exit(&vdp->xdf_cb_lk); 2527 return (EIO); 2528 } 2529 mutex_exit(&vdp->xdf_dev_lk); 2530 mutex_exit(&vdp->xdf_cb_lk); 2531 return (0); 2532 } 2533 2534 /* 2535 * Watch for media state changes. This can be an insertion of a device 2536 * (triggered by a 'xm block-configure' request in another domain) or 2537 * the ejection of a device (triggered by a local "eject" operation). 2538 * For a full description of the DKIOCSTATE ioctl behavior see dkio(7I). 2539 */ 2540 static int 2541 xdf_dkstate(xdf_t *vdp, enum dkio_state mstate) 2542 { 2543 enum dkio_state prev_state; 2544 2545 mutex_enter(&vdp->xdf_cb_lk); 2546 prev_state = vdp->xdf_mstate; 2547 2548 if (vdp->xdf_mstate == mstate) { 2549 while (vdp->xdf_mstate == prev_state) { 2550 if (cv_wait_sig(&vdp->xdf_mstate_cv, 2551 &vdp->xdf_cb_lk) == 0) { 2552 mutex_exit(&vdp->xdf_cb_lk); 2553 return (EINTR); 2554 } 2555 } 2556 } 2557 2558 if ((prev_state != DKIO_INSERTED) && 2559 (vdp->xdf_mstate == DKIO_INSERTED)) { 2560 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); 2561 mutex_exit(&vdp->xdf_cb_lk); 2562 return (0); 2563 } 2564 2565 mutex_exit(&vdp->xdf_cb_lk); 2566 return (0); 2567 } 2568 2569 /*ARGSUSED*/ 2570 static int 2571 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 2572 int *rvalp) 2573 { 2574 minor_t minor = getminor(dev); 2575 int part = XDF_PART(minor); 2576 xdf_t *vdp; 2577 int rv; 2578 2579 if (((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) || 2580 (!xdf_isopen(vdp, part))) 2581 return (ENXIO); 2582 2583 DPRINTF(IOCTL_DBG, ("xdf@%s:ioctl: cmd %d (0x%x)\n", 2584 vdp->xdf_addr, cmd, cmd)); 2585 2586 switch (cmd) { 2587 default: 2588 return (ENOTTY); 2589 case DKIOCG_PHYGEOM: 2590 case DKIOCG_VIRTGEOM: 2591 case DKIOCGGEOM: 2592 case DKIOCSGEOM: 2593 case DKIOCGAPART: 2594 case DKIOCSAPART: 2595 case DKIOCGVTOC: 2596 case DKIOCSVTOC: 2597 case DKIOCPARTINFO: 2598 case DKIOCGEXTVTOC: 2599 case DKIOCSEXTVTOC: 2600 case DKIOCEXTPARTINFO: 2601 case DKIOCGMBOOT: 2602 case DKIOCSMBOOT: 2603 case DKIOCGETEFI: 2604 case DKIOCSETEFI: 2605 case DKIOCSETEXTPART: 2606 case DKIOCPARTITION: 2607 rv = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, 2608 rvalp, NULL); 2609 if (rv != 0) 2610 return (rv); 2611 /* 2612 * If we're labelling the disk, we have to update the geometry 2613 * in the cmlb data structures, and we also have to write a new 2614 * devid to the disk. Note that writing an EFI label currently 2615 * requires 4 ioctls, and devid setup will fail on all but the 2616 * last. 2617 */ 2618 if (cmd == DKIOCSEXTVTOC || cmd == DKIOCSVTOC || 2619 cmd == DKIOCSETEFI) { 2620 rv = cmlb_validate(vdp->xdf_vd_lbl, 0, 0); 2621 if (rv == 0) { 2622 xdf_devid_setup(vdp); 2623 } else { 2624 cmn_err(CE_WARN, 2625 "xdf@%s, labeling failed on validate", 2626 vdp->xdf_addr); 2627 } 2628 } 2629 return (rv); 2630 case FDEJECT: 2631 case DKIOCEJECT: 2632 case CDROMEJECT: 2633 return (xdf_ioctl_eject(vdp)); 2634 case DKIOCLOCK: 2635 return (xdf_ioctl_mlock(vdp)); 2636 case DKIOCUNLOCK: 2637 return (xdf_ioctl_munlock(vdp)); 2638 case CDROMREADOFFSET: { 2639 int offset = 0; 2640 if (!XD_IS_CD(vdp)) 2641 return (ENOTTY); 2642 if (ddi_copyout(&offset, (void *)arg, sizeof (int), mode)) 2643 return (EFAULT); 2644 return (0); 2645 } 2646 case DKIOCGMEDIAINFO: { 2647 struct dk_minfo media_info; 2648 2649 media_info.dki_lbsize = vdp->xdf_xdev_secsize; 2650 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; 2651 if (XD_IS_CD(vdp)) 2652 media_info.dki_media_type = DK_CDROM; 2653 else 2654 media_info.dki_media_type = DK_FIXED_DISK; 2655 2656 if (ddi_copyout(&media_info, (void *)arg, 2657 sizeof (struct dk_minfo), mode)) 2658 return (EFAULT); 2659 return (0); 2660 } 2661 case DKIOCINFO: { 2662 struct dk_cinfo info; 2663 2664 /* controller information */ 2665 if (XD_IS_CD(vdp)) 2666 info.dki_ctype = DKC_CDROM; 2667 else 2668 info.dki_ctype = DKC_VBD; 2669 2670 info.dki_cnum = 0; 2671 (void) strncpy((char *)(&info.dki_cname), "xdf", 8); 2672 2673 /* unit information */ 2674 info.dki_unit = ddi_get_instance(vdp->xdf_dip); 2675 (void) strncpy((char *)(&info.dki_dname), "xdf", 8); 2676 info.dki_flags = DKI_FMTVOL; 2677 info.dki_partition = part; 2678 info.dki_maxtransfer = maxphys / DEV_BSIZE; 2679 info.dki_addr = 0; 2680 info.dki_space = 0; 2681 info.dki_prio = 0; 2682 info.dki_vec = 0; 2683 2684 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) 2685 return (EFAULT); 2686 return (0); 2687 } 2688 case DKIOCSTATE: { 2689 enum dkio_state mstate; 2690 2691 if (ddi_copyin((void *)arg, &mstate, 2692 sizeof (mstate), mode) != 0) 2693 return (EFAULT); 2694 if ((rv = xdf_dkstate(vdp, mstate)) != 0) 2695 return (rv); 2696 mstate = vdp->xdf_mstate; 2697 if (ddi_copyout(&mstate, (void *)arg, 2698 sizeof (mstate), mode) != 0) 2699 return (EFAULT); 2700 return (0); 2701 } 2702 case DKIOCREMOVABLE: { 2703 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2704 if (ddi_copyout(&i, (caddr_t)arg, sizeof (i), mode)) 2705 return (EFAULT); 2706 return (0); 2707 } 2708 case DKIOCGETWCE: { 2709 int i = BOOLEAN2VOID(XD_IS_RM(vdp)); 2710 if (ddi_copyout(&i, (void *)arg, sizeof (i), mode)) 2711 return (EFAULT); 2712 return (0); 2713 } 2714 case DKIOCSETWCE: { 2715 int i; 2716 if (ddi_copyin((void *)arg, &i, sizeof (i), mode)) 2717 return (EFAULT); 2718 vdp->xdf_wce = VOID2BOOLEAN(i); 2719 return (0); 2720 } 2721 case DKIOCFLUSHWRITECACHE: { 2722 struct dk_callback *dkc = (struct dk_callback *)arg; 2723 2724 if (vdp->xdf_flush_supported) { 2725 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2726 NULL, 0, 0, (void *)dev); 2727 } else if (vdp->xdf_feature_barrier && 2728 !xdf_barrier_flush_disable) { 2729 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, 2730 vdp->xdf_cache_flush_block, xdf_flush_block, 2731 vdp->xdf_xdev_secsize, (void *)dev); 2732 } else { 2733 return (ENOTTY); 2734 } 2735 if ((mode & FKIOCTL) && (dkc != NULL) && 2736 (dkc->dkc_callback != NULL)) { 2737 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 2738 /* need to return 0 after calling callback */ 2739 rv = 0; 2740 } 2741 return (rv); 2742 } 2743 } 2744 /*NOTREACHED*/ 2745 } 2746 2747 static int 2748 xdf_strategy(struct buf *bp) 2749 { 2750 xdf_t *vdp; 2751 minor_t minor; 2752 diskaddr_t p_blkct, p_blkst; 2753 daddr_t blkno; 2754 ulong_t nblks; 2755 int part; 2756 2757 minor = getminor(bp->b_edev); 2758 part = XDF_PART(minor); 2759 vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor)); 2760 2761 mutex_enter(&vdp->xdf_dev_lk); 2762 if (!xdf_isopen(vdp, part)) { 2763 mutex_exit(&vdp->xdf_dev_lk); 2764 xdf_io_err(bp, ENXIO, 0); 2765 return (0); 2766 } 2767 2768 /* We don't allow IO from the oe_change callback thread */ 2769 ASSERT(curthread != vdp->xdf_oe_change_thread); 2770 2771 /* Check for writes to a read only device */ 2772 if (!IS_READ(bp) && XD_IS_RO(vdp)) { 2773 mutex_exit(&vdp->xdf_dev_lk); 2774 xdf_io_err(bp, EROFS, 0); 2775 return (0); 2776 } 2777 2778 /* Check if this I/O is accessing a partition or the entire disk */ 2779 if ((long)bp->b_private == XB_SLICE_NONE) { 2780 /* This I/O is using an absolute offset */ 2781 p_blkct = vdp->xdf_xdev_nblocks; 2782 p_blkst = 0; 2783 } else { 2784 /* This I/O is using a partition relative offset */ 2785 mutex_exit(&vdp->xdf_dev_lk); 2786 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 2787 &p_blkst, NULL, NULL, NULL)) { 2788 xdf_io_err(bp, ENXIO, 0); 2789 return (0); 2790 } 2791 mutex_enter(&vdp->xdf_dev_lk); 2792 } 2793 2794 /* 2795 * Adjust the real blkno and bcount according to the underline 2796 * physical sector size. 2797 */ 2798 blkno = bp->b_blkno / (vdp->xdf_xdev_secsize / XB_BSIZE); 2799 2800 /* check for a starting block beyond the disk or partition limit */ 2801 if (blkno > p_blkct) { 2802 DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64, 2803 vdp->xdf_addr, (longlong_t)blkno, (uint64_t)p_blkct)); 2804 mutex_exit(&vdp->xdf_dev_lk); 2805 xdf_io_err(bp, EINVAL, 0); 2806 return (0); 2807 } 2808 2809 /* Legacy: don't set error flag at this case */ 2810 if (blkno == p_blkct) { 2811 mutex_exit(&vdp->xdf_dev_lk); 2812 bp->b_resid = bp->b_bcount; 2813 biodone(bp); 2814 return (0); 2815 } 2816 2817 /* sanitize the input buf */ 2818 bioerror(bp, 0); 2819 bp->b_resid = 0; 2820 bp->av_back = bp->av_forw = NULL; 2821 2822 /* Adjust for partial transfer, this will result in an error later */ 2823 if (vdp->xdf_xdev_secsize != 0 && 2824 vdp->xdf_xdev_secsize != XB_BSIZE) { 2825 nblks = bp->b_bcount / vdp->xdf_xdev_secsize; 2826 } else { 2827 nblks = bp->b_bcount >> XB_BSHIFT; 2828 } 2829 2830 if ((blkno + nblks) > p_blkct) { 2831 if (vdp->xdf_xdev_secsize != 0 && 2832 vdp->xdf_xdev_secsize != XB_BSIZE) { 2833 bp->b_resid = 2834 ((blkno + nblks) - p_blkct) * 2835 vdp->xdf_xdev_secsize; 2836 } else { 2837 bp->b_resid = 2838 ((blkno + nblks) - p_blkct) << 2839 XB_BSHIFT; 2840 } 2841 bp->b_bcount -= bp->b_resid; 2842 } 2843 2844 DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n", 2845 vdp->xdf_addr, (longlong_t)blkno, (ulong_t)bp->b_bcount)); 2846 2847 /* Fix up the buf struct */ 2848 bp->b_flags |= B_BUSY; 2849 bp->b_private = (void *)(uintptr_t)p_blkst; 2850 2851 xdf_bp_push(vdp, bp); 2852 mutex_exit(&vdp->xdf_dev_lk); 2853 xdf_io_start(vdp); 2854 if (do_polled_io) 2855 (void) xdf_ring_drain(vdp); 2856 return (0); 2857 } 2858 2859 /*ARGSUSED*/ 2860 static int 2861 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) 2862 { 2863 xdf_t *vdp; 2864 minor_t minor; 2865 diskaddr_t p_blkcnt; 2866 int part; 2867 2868 minor = getminor(dev); 2869 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2870 return (ENXIO); 2871 2872 DPRINTF(IO_DBG, ("xdf@%s: read offset 0x%"PRIx64"\n", 2873 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2874 2875 part = XDF_PART(minor); 2876 if (!xdf_isopen(vdp, part)) 2877 return (ENXIO); 2878 2879 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2880 NULL, NULL, NULL, NULL)) 2881 return (ENXIO); 2882 2883 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2884 return (ENOSPC); 2885 2886 if (U_INVAL(uiop)) 2887 return (EINVAL); 2888 2889 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); 2890 } 2891 2892 /*ARGSUSED*/ 2893 static int 2894 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) 2895 { 2896 xdf_t *vdp; 2897 minor_t minor; 2898 diskaddr_t p_blkcnt; 2899 int part; 2900 2901 minor = getminor(dev); 2902 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2903 return (ENXIO); 2904 2905 DPRINTF(IO_DBG, ("xdf@%s: write offset 0x%"PRIx64"\n", 2906 vdp->xdf_addr, (int64_t)uiop->uio_offset)); 2907 2908 part = XDF_PART(minor); 2909 if (!xdf_isopen(vdp, part)) 2910 return (ENXIO); 2911 2912 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2913 NULL, NULL, NULL, NULL)) 2914 return (ENXIO); 2915 2916 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2917 return (ENOSPC); 2918 2919 if (U_INVAL(uiop)) 2920 return (EINVAL); 2921 2922 return (physio(xdf_strategy, NULL, dev, B_WRITE, xdfmin, uiop)); 2923 } 2924 2925 /*ARGSUSED*/ 2926 static int 2927 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) 2928 { 2929 xdf_t *vdp; 2930 minor_t minor; 2931 struct uio *uiop = aiop->aio_uio; 2932 diskaddr_t p_blkcnt; 2933 int part; 2934 2935 minor = getminor(dev); 2936 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2937 return (ENXIO); 2938 2939 part = XDF_PART(minor); 2940 if (!xdf_isopen(vdp, part)) 2941 return (ENXIO); 2942 2943 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2944 NULL, NULL, NULL, NULL)) 2945 return (ENXIO); 2946 2947 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2948 return (ENOSPC); 2949 2950 if (U_INVAL(uiop)) 2951 return (EINVAL); 2952 2953 return (aphysio(xdf_strategy, anocancel, dev, B_READ, xdfmin, aiop)); 2954 } 2955 2956 /*ARGSUSED*/ 2957 static int 2958 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) 2959 { 2960 xdf_t *vdp; 2961 minor_t minor; 2962 struct uio *uiop = aiop->aio_uio; 2963 diskaddr_t p_blkcnt; 2964 int part; 2965 2966 minor = getminor(dev); 2967 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2968 return (ENXIO); 2969 2970 part = XDF_PART(minor); 2971 if (!xdf_isopen(vdp, part)) 2972 return (ENXIO); 2973 2974 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, 2975 NULL, NULL, NULL, NULL)) 2976 return (ENXIO); 2977 2978 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) 2979 return (ENOSPC); 2980 2981 if (U_INVAL(uiop)) 2982 return (EINVAL); 2983 2984 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, xdfmin, aiop)); 2985 } 2986 2987 static int 2988 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 2989 { 2990 struct buf dumpbuf, *dbp = &dumpbuf; 2991 xdf_t *vdp; 2992 minor_t minor; 2993 int err = 0; 2994 int part; 2995 diskaddr_t p_blkcnt, p_blkst; 2996 2997 minor = getminor(dev); 2998 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 2999 return (ENXIO); 3000 3001 DPRINTF(IO_DBG, ("xdf@%s: dump addr (0x%p) blk (%ld) nblks (%d)\n", 3002 vdp->xdf_addr, (void *)addr, blkno, nblk)); 3003 3004 /* We don't allow IO from the oe_change callback thread */ 3005 ASSERT(curthread != vdp->xdf_oe_change_thread); 3006 3007 part = XDF_PART(minor); 3008 if (!xdf_isopen(vdp, part)) 3009 return (ENXIO); 3010 3011 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, 3012 NULL, NULL, NULL)) 3013 return (ENXIO); 3014 3015 if ((blkno + nblk) > 3016 (p_blkcnt * (vdp->xdf_xdev_secsize / XB_BSIZE))) { 3017 cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64, 3018 vdp->xdf_addr, (daddr_t)((blkno + nblk) / 3019 (vdp->xdf_xdev_secsize / XB_BSIZE)), (uint64_t)p_blkcnt); 3020 return (EINVAL); 3021 } 3022 3023 bioinit(dbp); 3024 dbp->b_flags = B_BUSY; 3025 dbp->b_un.b_addr = addr; 3026 dbp->b_bcount = nblk << DEV_BSHIFT; 3027 dbp->b_blkno = blkno; 3028 dbp->b_edev = dev; 3029 dbp->b_private = (void *)(uintptr_t)p_blkst; 3030 3031 mutex_enter(&vdp->xdf_dev_lk); 3032 xdf_bp_push(vdp, dbp); 3033 mutex_exit(&vdp->xdf_dev_lk); 3034 xdf_io_start(vdp); 3035 err = xdf_ring_drain(vdp); 3036 biofini(dbp); 3037 return (err); 3038 } 3039 3040 /*ARGSUSED*/ 3041 static int 3042 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) 3043 { 3044 minor_t minor; 3045 xdf_t *vdp; 3046 int part; 3047 ulong_t parbit; 3048 3049 minor = getminor(dev); 3050 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 3051 return (ENXIO); 3052 3053 mutex_enter(&vdp->xdf_dev_lk); 3054 part = XDF_PART(minor); 3055 if (!xdf_isopen(vdp, part)) { 3056 mutex_exit(&vdp->xdf_dev_lk); 3057 return (ENXIO); 3058 } 3059 parbit = 1 << part; 3060 3061 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); 3062 if (otyp == OTYP_LYR) { 3063 ASSERT(vdp->xdf_vd_lyropen[part] > 0); 3064 if (--vdp->xdf_vd_lyropen[part] == 0) 3065 vdp->xdf_vd_open[otyp] &= ~parbit; 3066 } else { 3067 vdp->xdf_vd_open[otyp] &= ~parbit; 3068 } 3069 vdp->xdf_vd_exclopen &= ~parbit; 3070 3071 mutex_exit(&vdp->xdf_dev_lk); 3072 return (0); 3073 } 3074 3075 static int 3076 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) 3077 { 3078 minor_t minor; 3079 xdf_t *vdp; 3080 int part; 3081 ulong_t parbit; 3082 diskaddr_t p_blkct = 0; 3083 boolean_t firstopen; 3084 boolean_t nodelay; 3085 3086 minor = getminor(*devp); 3087 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) 3088 return (ENXIO); 3089 3090 nodelay = (flag & (FNDELAY | FNONBLOCK)); 3091 3092 DPRINTF(DDI_DBG, ("xdf@%s: opening\n", vdp->xdf_addr)); 3093 3094 /* do cv_wait until connected or failed */ 3095 mutex_enter(&vdp->xdf_cb_lk); 3096 mutex_enter(&vdp->xdf_dev_lk); 3097 if (!nodelay && (xdf_connect_locked(vdp, B_TRUE) != XD_READY)) { 3098 mutex_exit(&vdp->xdf_dev_lk); 3099 mutex_exit(&vdp->xdf_cb_lk); 3100 return (ENXIO); 3101 } 3102 mutex_exit(&vdp->xdf_cb_lk); 3103 3104 if ((flag & FWRITE) && XD_IS_RO(vdp)) { 3105 mutex_exit(&vdp->xdf_dev_lk); 3106 return (EROFS); 3107 } 3108 3109 part = XDF_PART(minor); 3110 parbit = 1 << part; 3111 if ((vdp->xdf_vd_exclopen & parbit) || 3112 ((flag & FEXCL) && xdf_isopen(vdp, part))) { 3113 mutex_exit(&vdp->xdf_dev_lk); 3114 return (EBUSY); 3115 } 3116 3117 /* are we the first one to open this node? */ 3118 firstopen = !xdf_isopen(vdp, -1); 3119 3120 if (otyp == OTYP_LYR) 3121 vdp->xdf_vd_lyropen[part]++; 3122 3123 vdp->xdf_vd_open[otyp] |= parbit; 3124 3125 if (flag & FEXCL) 3126 vdp->xdf_vd_exclopen |= parbit; 3127 3128 mutex_exit(&vdp->xdf_dev_lk); 3129 3130 /* force a re-validation */ 3131 if (firstopen) 3132 cmlb_invalidate(vdp->xdf_vd_lbl, NULL); 3133 3134 /* If this is a non-blocking open then we're done */ 3135 if (nodelay) 3136 return (0); 3137 3138 /* 3139 * This is a blocking open, so we require: 3140 * - that the disk have a valid label on it 3141 * - that the size of the partition that we're opening is non-zero 3142 */ 3143 if ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, 3144 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0)) { 3145 (void) xdf_close(*devp, flag, otyp, credp); 3146 return (ENXIO); 3147 } 3148 3149 return (0); 3150 } 3151 3152 /*ARGSUSED*/ 3153 static void 3154 xdf_watch_hp_status_cb(dev_info_t *dip, const char *path, void *arg) 3155 { 3156 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); 3157 cv_broadcast(&vdp->xdf_hp_status_cv); 3158 } 3159 3160 static int 3161 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, 3162 char *name, caddr_t valuep, int *lengthp) 3163 { 3164 xdf_t *vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); 3165 3166 /* 3167 * Sanity check that if a dev_t or dip were specified that they 3168 * correspond to this device driver. On debug kernels we'll 3169 * panic and on non-debug kernels we'll return failure. 3170 */ 3171 ASSERT(ddi_driver_major(dip) == xdf_major); 3172 ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdf_major)); 3173 if ((ddi_driver_major(dip) != xdf_major) || 3174 ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdf_major))) 3175 return (DDI_PROP_NOT_FOUND); 3176 3177 if (vdp == NULL) 3178 return (ddi_prop_op(dev, dip, prop_op, flags, 3179 name, valuep, lengthp)); 3180 3181 return (cmlb_prop_op(vdp->xdf_vd_lbl, 3182 dev, dip, prop_op, flags, name, valuep, lengthp, 3183 XDF_PART(getminor(dev)), NULL)); 3184 } 3185 3186 /*ARGSUSED*/ 3187 static int 3188 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) 3189 { 3190 int instance = XDF_INST(getminor((dev_t)arg)); 3191 xdf_t *vbdp; 3192 3193 switch (cmd) { 3194 case DDI_INFO_DEVT2DEVINFO: 3195 if ((vbdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) { 3196 *rp = NULL; 3197 return (DDI_FAILURE); 3198 } 3199 *rp = vbdp->xdf_dip; 3200 return (DDI_SUCCESS); 3201 3202 case DDI_INFO_DEVT2INSTANCE: 3203 *rp = (void *)(uintptr_t)instance; 3204 return (DDI_SUCCESS); 3205 3206 default: 3207 return (DDI_FAILURE); 3208 } 3209 } 3210 3211 /*ARGSUSED*/ 3212 static int 3213 xdf_resume(dev_info_t *dip) 3214 { 3215 xdf_t *vdp; 3216 char *oename; 3217 3218 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) 3219 goto err; 3220 3221 if (xdf_debug & SUSRES_DBG) 3222 xen_printf("xdf@%s: xdf_resume\n", vdp->xdf_addr); 3223 3224 mutex_enter(&vdp->xdf_cb_lk); 3225 3226 if (xvdi_resume(dip) != DDI_SUCCESS) { 3227 mutex_exit(&vdp->xdf_cb_lk); 3228 goto err; 3229 } 3230 3231 if (((oename = xvdi_get_oename(dip)) == NULL) || 3232 (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3233 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)) { 3234 mutex_exit(&vdp->xdf_cb_lk); 3235 goto err; 3236 } 3237 3238 mutex_enter(&vdp->xdf_dev_lk); 3239 ASSERT(vdp->xdf_state != XD_READY); 3240 xdf_set_state(vdp, XD_UNKNOWN); 3241 mutex_exit(&vdp->xdf_dev_lk); 3242 3243 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3244 mutex_exit(&vdp->xdf_cb_lk); 3245 goto err; 3246 } 3247 3248 mutex_exit(&vdp->xdf_cb_lk); 3249 3250 if (xdf_debug & SUSRES_DBG) 3251 xen_printf("xdf@%s: xdf_resume: done\n", vdp->xdf_addr); 3252 return (DDI_SUCCESS); 3253 err: 3254 if (xdf_debug & SUSRES_DBG) 3255 xen_printf("xdf@%s: xdf_resume: fail\n", vdp->xdf_addr); 3256 return (DDI_FAILURE); 3257 } 3258 3259 /* 3260 * Uses the in-memory devid if one exists. 3261 * 3262 * Create a devid and write it on the first block of the last track of 3263 * the last cylinder. 3264 * Return DDI_SUCCESS or DDI_FAILURE. 3265 */ 3266 static int 3267 xdf_devid_fabricate(xdf_t *vdp) 3268 { 3269 ddi_devid_t devid = vdp->xdf_tgt_devid; /* null if no devid */ 3270 struct dk_devid *dkdevidp = NULL; /* devid struct stored on disk */ 3271 diskaddr_t blk; 3272 uint_t *ip, chksum; 3273 int i, devid_size; 3274 3275 if (cmlb_get_devid_block(vdp->xdf_vd_lbl, &blk, NULL) != 0) 3276 goto err; 3277 3278 if (devid == NULL && ddi_devid_init(vdp->xdf_dip, DEVID_FAB, 0, 3279 NULL, &devid) != DDI_SUCCESS) 3280 goto err; 3281 3282 /* allocate a buffer */ 3283 dkdevidp = (struct dk_devid *)kmem_zalloc(NBPSCTR, KM_SLEEP); 3284 3285 /* Fill in the revision */ 3286 dkdevidp->dkd_rev_hi = DK_DEVID_REV_MSB; 3287 dkdevidp->dkd_rev_lo = DK_DEVID_REV_LSB; 3288 3289 /* Copy in the device id */ 3290 devid_size = ddi_devid_sizeof(devid); 3291 if (devid_size > DK_DEVID_SIZE) 3292 goto err; 3293 bcopy(devid, dkdevidp->dkd_devid, devid_size); 3294 3295 /* Calculate the chksum */ 3296 chksum = 0; 3297 ip = (uint_t *)dkdevidp; 3298 for (i = 0; i < (NBPSCTR / sizeof (int)) - 1; i++) 3299 chksum ^= ip[i]; 3300 3301 /* Fill in the checksum */ 3302 DKD_FORMCHKSUM(chksum, dkdevidp); 3303 3304 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, dkdevidp, blk, 3305 NBPSCTR, NULL) != 0) 3306 goto err; 3307 3308 kmem_free(dkdevidp, NBPSCTR); 3309 3310 vdp->xdf_tgt_devid = devid; 3311 return (DDI_SUCCESS); 3312 3313 err: 3314 if (dkdevidp != NULL) 3315 kmem_free(dkdevidp, NBPSCTR); 3316 if (devid != NULL && vdp->xdf_tgt_devid == NULL) 3317 ddi_devid_free(devid); 3318 return (DDI_FAILURE); 3319 } 3320 3321 /* 3322 * xdf_devid_read() is a local copy of xdfs_devid_read(), modified to use xdf 3323 * functions. 3324 * 3325 * Read a devid from on the first block of the last track of 3326 * the last cylinder. Make sure what we read is a valid devid. 3327 * Return DDI_SUCCESS or DDI_FAILURE. 3328 */ 3329 static int 3330 xdf_devid_read(xdf_t *vdp) 3331 { 3332 diskaddr_t blk; 3333 struct dk_devid *dkdevidp; 3334 uint_t *ip, chksum; 3335 int i; 3336 3337 if (cmlb_get_devid_block(vdp->xdf_vd_lbl, &blk, NULL) != 0) 3338 return (DDI_FAILURE); 3339 3340 dkdevidp = kmem_zalloc(NBPSCTR, KM_SLEEP); 3341 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, dkdevidp, blk, 3342 NBPSCTR, NULL) != 0) 3343 goto err; 3344 3345 /* Validate the revision */ 3346 if ((dkdevidp->dkd_rev_hi != DK_DEVID_REV_MSB) || 3347 (dkdevidp->dkd_rev_lo != DK_DEVID_REV_LSB)) 3348 goto err; 3349 3350 /* Calculate the checksum */ 3351 chksum = 0; 3352 ip = (uint_t *)dkdevidp; 3353 for (i = 0; i < (NBPSCTR / sizeof (int)) - 1; i++) 3354 chksum ^= ip[i]; 3355 if (DKD_GETCHKSUM(dkdevidp) != chksum) 3356 goto err; 3357 3358 /* Validate the device id */ 3359 if (ddi_devid_valid((ddi_devid_t)dkdevidp->dkd_devid) != DDI_SUCCESS) 3360 goto err; 3361 3362 /* keep a copy of the device id */ 3363 i = ddi_devid_sizeof((ddi_devid_t)dkdevidp->dkd_devid); 3364 vdp->xdf_tgt_devid = kmem_alloc(i, KM_SLEEP); 3365 bcopy(dkdevidp->dkd_devid, vdp->xdf_tgt_devid, i); 3366 kmem_free(dkdevidp, NBPSCTR); 3367 return (DDI_SUCCESS); 3368 3369 err: 3370 kmem_free(dkdevidp, NBPSCTR); 3371 return (DDI_FAILURE); 3372 } 3373 3374 /* 3375 * xdf_devid_setup() is a modified copy of cmdk_devid_setup(). 3376 * 3377 * This function creates a devid if we don't already have one, and 3378 * registers it. If we already have one, we make sure that it can be 3379 * read from the disk, otherwise we write it to the disk ourselves. If 3380 * we didn't already have a devid, and we create one, we also need to 3381 * register it. 3382 */ 3383 void 3384 xdf_devid_setup(xdf_t *vdp) 3385 { 3386 int rc; 3387 boolean_t existed = vdp->xdf_tgt_devid != NULL; 3388 3389 /* Read devid from the disk, if present */ 3390 rc = xdf_devid_read(vdp); 3391 3392 /* Otherwise write a devid (which we create if necessary) on the disk */ 3393 if (rc != DDI_SUCCESS) 3394 rc = xdf_devid_fabricate(vdp); 3395 3396 /* If we created a devid or found it on the disk, register it */ 3397 if (rc == DDI_SUCCESS && !existed) 3398 (void) ddi_devid_register(vdp->xdf_dip, vdp->xdf_tgt_devid); 3399 } 3400 3401 static int 3402 xdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3403 { 3404 int n, instance = ddi_get_instance(dip); 3405 ddi_iblock_cookie_t ibc, softibc; 3406 boolean_t dev_iscd = B_FALSE; 3407 xdf_t *vdp; 3408 char *oename, *xsname, *str; 3409 clock_t timeout; 3410 int err = 0; 3411 3412 if ((n = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_NOTPROM, 3413 "xdf_debug", 0)) != 0) 3414 xdf_debug = n; 3415 3416 switch (cmd) { 3417 case DDI_RESUME: 3418 return (xdf_resume(dip)); 3419 case DDI_ATTACH: 3420 break; 3421 default: 3422 return (DDI_FAILURE); 3423 } 3424 /* DDI_ATTACH */ 3425 3426 if ((xsname = xvdi_get_xsname(dip)) == NULL || 3427 (oename = xvdi_get_oename(dip)) == NULL) 3428 return (DDI_FAILURE); 3429 3430 /* 3431 * Disable auto-detach. This is necessary so that we don't get 3432 * detached while we're disconnected from the back end. 3433 */ 3434 if ((ddi_prop_update_int(DDI_DEV_T_NONE, dip, 3435 DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS)) 3436 return (DDI_FAILURE); 3437 3438 /* driver handles kernel-issued IOCTLs */ 3439 if (ddi_prop_create(DDI_DEV_T_NONE, dip, 3440 DDI_PROP_CANSLEEP, DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) 3441 return (DDI_FAILURE); 3442 3443 if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS) 3444 return (DDI_FAILURE); 3445 3446 if (ddi_get_soft_iblock_cookie(dip, 3447 DDI_SOFTINT_LOW, &softibc) != DDI_SUCCESS) 3448 return (DDI_FAILURE); 3449 3450 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { 3451 cmn_err(CE_WARN, "xdf@%s: cannot read device-type", 3452 ddi_get_name_addr(dip)); 3453 return (DDI_FAILURE); 3454 } 3455 if (strcmp(str, XBV_DEV_TYPE_CD) == 0) 3456 dev_iscd = B_TRUE; 3457 strfree(str); 3458 3459 if (ddi_soft_state_zalloc(xdf_ssp, instance) != DDI_SUCCESS) 3460 return (DDI_FAILURE); 3461 3462 DPRINTF(DDI_DBG, ("xdf@%s: attaching\n", ddi_get_name_addr(dip))); 3463 vdp = ddi_get_soft_state(xdf_ssp, instance); 3464 ddi_set_driver_private(dip, vdp); 3465 vdp->xdf_dip = dip; 3466 vdp->xdf_addr = ddi_get_name_addr(dip); 3467 vdp->xdf_suspending = B_FALSE; 3468 vdp->xdf_media_req_supported = B_FALSE; 3469 vdp->xdf_peer = INVALID_DOMID; 3470 vdp->xdf_evtchn = INVALID_EVTCHN; 3471 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), 3472 offsetof(v_req_t, v_link)); 3473 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); 3474 cv_init(&vdp->xdf_hp_status_cv, NULL, CV_DEFAULT, NULL); 3475 cv_init(&vdp->xdf_mstate_cv, NULL, CV_DEFAULT, NULL); 3476 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3477 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3478 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, (void *)ibc); 3479 vdp->xdf_cmbl_reattach = B_TRUE; 3480 if (dev_iscd) { 3481 vdp->xdf_dinfo |= VDISK_CDROM; 3482 vdp->xdf_mstate = DKIO_EJECTED; 3483 } else { 3484 vdp->xdf_mstate = DKIO_NONE; 3485 } 3486 3487 if ((vdp->xdf_ready_tq = ddi_taskq_create(dip, "xdf_ready_tq", 3488 1, TASKQ_DEFAULTPRI, 0)) == NULL) 3489 goto errout0; 3490 3491 if (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, 3492 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS) 3493 goto errout0; 3494 3495 if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, 3496 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { 3497 cmn_err(CE_WARN, "xdf@%s: failed to add softintr", 3498 ddi_get_name_addr(dip)); 3499 goto errout0; 3500 } 3501 3502 /* 3503 * Initialize the physical geometry stucture. Note that currently 3504 * we don't know the size of the backend device so the number 3505 * of blocks on the device will be initialized to zero. Once 3506 * we connect to the backend device we'll update the physical 3507 * geometry to reflect the real size of the device. 3508 */ 3509 xdf_synthetic_pgeom(dip, &vdp->xdf_pgeom); 3510 vdp->xdf_pgeom_fixed = B_FALSE; 3511 3512 /* 3513 * Create default device minor nodes: non-removable disk. 3514 * We will adjust minor nodes after we are connected w/ backend. 3515 * 3516 * FIXME creating device minor nodes is currently disabled for CD 3517 * devices, re-enable once the issues with xdf CD devices are fixed. 3518 */ 3519 if (!dev_iscd) { 3520 cmlb_alloc_handle(&vdp->xdf_vd_lbl); 3521 if (xdf_cmlb_attach(vdp) != 0) { 3522 cmn_err(CE_WARN, 3523 "xdf@%s: attach failed, cmlb attach failed", 3524 ddi_get_name_addr(dip)); 3525 goto errout0; 3526 } 3527 } 3528 3529 /* We ship with cache-enabled disks */ 3530 vdp->xdf_wce = B_TRUE; 3531 3532 mutex_enter(&vdp->xdf_cb_lk); 3533 /* Watch backend XenbusState change */ 3534 if (xvdi_add_event_handler(dip, 3535 XS_OE_STATE, xdf_oe_change, NULL) != DDI_SUCCESS) { 3536 mutex_exit(&vdp->xdf_cb_lk); 3537 goto errout0; 3538 } 3539 3540 if (xdf_setstate_init(vdp) != DDI_SUCCESS) { 3541 cmn_err(CE_WARN, "xdf@%s: start connection failed", 3542 ddi_get_name_addr(dip)); 3543 mutex_exit(&vdp->xdf_cb_lk); 3544 goto errout1; 3545 } 3546 3547 /* Nothing else to do for CD devices */ 3548 if (dev_iscd) { 3549 mutex_exit(&vdp->xdf_cb_lk); 3550 goto done; 3551 } 3552 3553 /* 3554 * In order to do cmlb_validate, we have to wait for the disk to 3555 * acknowledge the attach, so we can query the backend for the disk 3556 * geometry (see xdf_setstate_connected). 3557 * 3558 * We only wait 30 seconds; if this is the root disk, the boot 3559 * will fail, but it would fail anyway if the device never 3560 * connected. If this is a non-boot disk, that disk will fail 3561 * to connect, but again, it would fail anyway. 3562 */ 3563 timeout = ddi_get_lbolt() + drv_usectohz(XDF_STATE_TIMEOUT); 3564 while (vdp->xdf_state != XD_CONNECTED && vdp->xdf_state != XD_READY) { 3565 if (cv_timedwait(&vdp->xdf_dev_cv, &vdp->xdf_cb_lk, 3566 timeout) < 0) { 3567 cmn_err(CE_WARN, "xdf@%s: disk failed to connect", 3568 ddi_get_name_addr(dip)); 3569 mutex_exit(&vdp->xdf_cb_lk); 3570 goto errout1; 3571 } 3572 } 3573 mutex_exit(&vdp->xdf_cb_lk); 3574 3575 /* 3576 * We call cmlb_validate so that the geometry information in 3577 * vdp->xdf_vd_lbl is correct; this fills out the number of 3578 * alternate cylinders so that we have a place to write the 3579 * devid. 3580 */ 3581 if ((err = cmlb_validate(vdp->xdf_vd_lbl, 0, NULL)) != 0) { 3582 cmn_err(CE_NOTE, 3583 "xdf@%s: cmlb_validate failed: %d", 3584 ddi_get_name_addr(dip), err); 3585 /* 3586 * We can carry on even if cmlb_validate() returns EINVAL here, 3587 * as we'll rewrite the disk label anyway. 3588 */ 3589 if (err != EINVAL) 3590 goto errout1; 3591 } 3592 3593 /* 3594 * xdf_devid_setup will only write a devid if one isn't 3595 * already present. If it fails to find or create one, we 3596 * create one in-memory so that when we label the disk later, 3597 * it will have a devid to use. This is helpful to deal with 3598 * cases where people use the devids of their disks before 3599 * labelling them; note that this does cause problems if 3600 * people rely on the devids of unlabelled disks to persist 3601 * across reboot. 3602 */ 3603 xdf_devid_setup(vdp); 3604 if (vdp->xdf_tgt_devid == NULL) { 3605 if (ddi_devid_init(vdp->xdf_dip, DEVID_FAB, 0, NULL, 3606 &vdp->xdf_tgt_devid) != DDI_SUCCESS) { 3607 cmn_err(CE_WARN, 3608 "xdf@%s_ attach failed, devid_init failed", 3609 ddi_get_name_addr(dip)); 3610 goto errout1; 3611 } else { 3612 (void) ddi_devid_register(vdp->xdf_dip, 3613 vdp->xdf_tgt_devid); 3614 } 3615 } 3616 3617 done: 3618 #ifdef XPV_HVM_DRIVER 3619 xdf_hvm_add(dip); 3620 3621 /* Report our version to dom0. */ 3622 if (xenbus_printf(XBT_NULL, "guest/xdf", "version", "%d", 3623 HVMPV_XDF_VERS)) 3624 cmn_err(CE_WARN, "xdf: couldn't write version\n"); 3625 3626 #endif /* XPV_HVM_DRIVER */ 3627 3628 /* Create kstat for iostat(1M) */ 3629 if (xdf_kstat_create(dip) != 0) { 3630 cmn_err(CE_WARN, "xdf@%s: failed to create kstat", 3631 ddi_get_name_addr(dip)); 3632 goto errout1; 3633 } 3634 3635 /* 3636 * Don't bother with getting real device identification 3637 * strings (is it even possible?), they are unlikely to 3638 * change often (if at all). 3639 */ 3640 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, INQUIRY_VENDOR_ID, 3641 "Xen"); 3642 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, INQUIRY_PRODUCT_ID, 3643 dev_iscd ? "Virtual CD" : "Virtual disk"); 3644 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, INQUIRY_REVISION_ID, 3645 "1.0"); 3646 3647 ddi_report_dev(dip); 3648 DPRINTF(DDI_DBG, ("xdf@%s: attached\n", vdp->xdf_addr)); 3649 return (DDI_SUCCESS); 3650 3651 errout1: 3652 (void) xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed); 3653 xvdi_remove_event_handler(dip, XS_OE_STATE); 3654 errout0: 3655 if (vdp->xdf_vd_lbl != NULL) { 3656 cmlb_detach(vdp->xdf_vd_lbl, NULL); 3657 cmlb_free_handle(&vdp->xdf_vd_lbl); 3658 vdp->xdf_vd_lbl = NULL; 3659 } 3660 if (vdp->xdf_softintr_id != NULL) 3661 ddi_remove_softintr(vdp->xdf_softintr_id); 3662 xvdi_remove_xb_watch_handlers(dip); 3663 if (vdp->xdf_ready_tq != NULL) 3664 ddi_taskq_destroy(vdp->xdf_ready_tq); 3665 mutex_destroy(&vdp->xdf_cb_lk); 3666 mutex_destroy(&vdp->xdf_dev_lk); 3667 cv_destroy(&vdp->xdf_dev_cv); 3668 cv_destroy(&vdp->xdf_hp_status_cv); 3669 ddi_soft_state_free(xdf_ssp, instance); 3670 ddi_set_driver_private(dip, NULL); 3671 ddi_prop_remove_all(dip); 3672 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(dip)); 3673 return (DDI_FAILURE); 3674 } 3675 3676 static int 3677 xdf_suspend(dev_info_t *dip) 3678 { 3679 int instance = ddi_get_instance(dip); 3680 xdf_t *vdp; 3681 3682 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) 3683 return (DDI_FAILURE); 3684 3685 if (xdf_debug & SUSRES_DBG) 3686 xen_printf("xdf@%s: xdf_suspend\n", vdp->xdf_addr); 3687 3688 xvdi_suspend(dip); 3689 3690 mutex_enter(&vdp->xdf_cb_lk); 3691 mutex_enter(&vdp->xdf_dev_lk); 3692 3693 vdp->xdf_suspending = B_TRUE; 3694 xdf_ring_destroy(vdp); 3695 xdf_set_state(vdp, XD_SUSPEND); 3696 vdp->xdf_suspending = B_FALSE; 3697 3698 mutex_exit(&vdp->xdf_dev_lk); 3699 mutex_exit(&vdp->xdf_cb_lk); 3700 3701 if (xdf_debug & SUSRES_DBG) 3702 xen_printf("xdf@%s: xdf_suspend: done\n", vdp->xdf_addr); 3703 3704 return (DDI_SUCCESS); 3705 } 3706 3707 static int 3708 xdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3709 { 3710 xdf_t *vdp; 3711 int instance; 3712 3713 switch (cmd) { 3714 3715 case DDI_PM_SUSPEND: 3716 break; 3717 3718 case DDI_SUSPEND: 3719 return (xdf_suspend(dip)); 3720 3721 case DDI_DETACH: 3722 break; 3723 3724 default: 3725 return (DDI_FAILURE); 3726 } 3727 3728 instance = ddi_get_instance(dip); 3729 DPRINTF(DDI_DBG, ("xdf@%s: detaching\n", ddi_get_name_addr(dip))); 3730 vdp = ddi_get_soft_state(xdf_ssp, instance); 3731 3732 if (vdp == NULL) 3733 return (DDI_FAILURE); 3734 3735 mutex_enter(&vdp->xdf_cb_lk); 3736 xdf_disconnect(vdp, XD_CLOSED, B_FALSE); 3737 if (vdp->xdf_state != XD_CLOSED) { 3738 mutex_exit(&vdp->xdf_cb_lk); 3739 return (DDI_FAILURE); 3740 } 3741 mutex_exit(&vdp->xdf_cb_lk); 3742 3743 ASSERT(!ISDMACBON(vdp)); 3744 3745 #ifdef XPV_HVM_DRIVER 3746 xdf_hvm_rm(dip); 3747 #endif /* XPV_HVM_DRIVER */ 3748 3749 if (vdp->xdf_timeout_id != 0) 3750 (void) untimeout(vdp->xdf_timeout_id); 3751 3752 xvdi_remove_event_handler(dip, XS_OE_STATE); 3753 ddi_taskq_destroy(vdp->xdf_ready_tq); 3754 3755 cmlb_detach(vdp->xdf_vd_lbl, NULL); 3756 cmlb_free_handle(&vdp->xdf_vd_lbl); 3757 3758 /* we'll support backend running in domU later */ 3759 #ifdef DOMU_BACKEND 3760 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 3761 #endif 3762 3763 list_destroy(&vdp->xdf_vreq_act); 3764 ddi_prop_remove_all(dip); 3765 xdf_kstat_delete(dip); 3766 ddi_remove_softintr(vdp->xdf_softintr_id); 3767 xvdi_remove_xb_watch_handlers(dip); 3768 ddi_set_driver_private(dip, NULL); 3769 cv_destroy(&vdp->xdf_dev_cv); 3770 mutex_destroy(&vdp->xdf_cb_lk); 3771 mutex_destroy(&vdp->xdf_dev_lk); 3772 if (vdp->xdf_cache_flush_block != NULL) 3773 kmem_free(vdp->xdf_flush_mem, 2 * vdp->xdf_xdev_secsize); 3774 ddi_soft_state_free(xdf_ssp, instance); 3775 return (DDI_SUCCESS); 3776 } 3777 3778 /* 3779 * Driver linkage structures. 3780 */ 3781 static struct cb_ops xdf_cbops = { 3782 xdf_open, 3783 xdf_close, 3784 xdf_strategy, 3785 nodev, 3786 xdf_dump, 3787 xdf_read, 3788 xdf_write, 3789 xdf_ioctl, 3790 nodev, 3791 nodev, 3792 nodev, 3793 nochpoll, 3794 xdf_prop_op, 3795 NULL, 3796 D_MP | D_NEW | D_64BIT, 3797 CB_REV, 3798 xdf_aread, 3799 xdf_awrite 3800 }; 3801 3802 struct dev_ops xdf_devops = { 3803 DEVO_REV, /* devo_rev */ 3804 0, /* devo_refcnt */ 3805 xdf_getinfo, /* devo_getinfo */ 3806 nulldev, /* devo_identify */ 3807 nulldev, /* devo_probe */ 3808 xdf_attach, /* devo_attach */ 3809 xdf_detach, /* devo_detach */ 3810 nodev, /* devo_reset */ 3811 &xdf_cbops, /* devo_cb_ops */ 3812 NULL, /* devo_bus_ops */ 3813 NULL, /* devo_power */ 3814 ddi_quiesce_not_supported, /* devo_quiesce */ 3815 }; 3816 3817 /* 3818 * Module linkage structures. 3819 */ 3820 static struct modldrv modldrv = { 3821 &mod_driverops, /* Type of module. This one is a driver */ 3822 "virtual block driver", /* short description */ 3823 &xdf_devops /* driver specific ops */ 3824 }; 3825 3826 static struct modlinkage xdf_modlinkage = { 3827 MODREV_1, (void *)&modldrv, NULL 3828 }; 3829 3830 /* 3831 * standard module entry points 3832 */ 3833 int 3834 _init(void) 3835 { 3836 int rc; 3837 3838 xdf_major = ddi_name_to_major("xdf"); 3839 if (xdf_major == (major_t)-1) 3840 return (EINVAL); 3841 3842 if ((rc = ddi_soft_state_init(&xdf_ssp, sizeof (xdf_t), 0)) != 0) 3843 return (rc); 3844 3845 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", 3846 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3847 xdf_gs_cache = kmem_cache_create("xdf_gs_cache", 3848 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3849 3850 #ifdef XPV_HVM_DRIVER 3851 xdf_hvm_init(); 3852 #endif /* XPV_HVM_DRIVER */ 3853 3854 if ((rc = mod_install(&xdf_modlinkage)) != 0) { 3855 #ifdef XPV_HVM_DRIVER 3856 xdf_hvm_fini(); 3857 #endif /* XPV_HVM_DRIVER */ 3858 kmem_cache_destroy(xdf_vreq_cache); 3859 kmem_cache_destroy(xdf_gs_cache); 3860 ddi_soft_state_fini(&xdf_ssp); 3861 return (rc); 3862 } 3863 3864 return (rc); 3865 } 3866 3867 int 3868 _fini(void) 3869 { 3870 int err; 3871 if ((err = mod_remove(&xdf_modlinkage)) != 0) 3872 return (err); 3873 3874 #ifdef XPV_HVM_DRIVER 3875 xdf_hvm_fini(); 3876 #endif /* XPV_HVM_DRIVER */ 3877 3878 kmem_cache_destroy(xdf_vreq_cache); 3879 kmem_cache_destroy(xdf_gs_cache); 3880 ddi_soft_state_fini(&xdf_ssp); 3881 3882 return (0); 3883 } 3884 3885 int 3886 _info(struct modinfo *modinfop) 3887 { 3888 return (mod_info(&xdf_modlinkage, modinfop)); 3889 } 3890