1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Note: This is the backend part of the split PV disk driver. This driver 29 * is not a nexus driver, nor is it a leaf driver(block/char/stream driver). 30 * Currently, it does not create any minor node. So, although, it runs in 31 * backend domain, it will not be used directly from within dom0. 32 * It simply gets block I/O requests issued by frontend from a shared page 33 * (blkif ring buffer - defined by Xen) between backend and frontend domain, 34 * generates a buf, and push it down to underlying disk target driver via 35 * ldi interface. When buf is done, this driver will generate a response 36 * and put it into ring buffer to inform frontend of the status of the I/O 37 * request issued by it. When a new virtual device entry is added in xenstore, 38 * there will be an watch event sent from Xen to xvdi framework, who will, 39 * in turn, create the devinfo node and try to attach this driver 40 * (see xvdi_create_dev). When frontend peer changes its state to 41 * XenbusStateClose, an event will also be sent from Xen to xvdi framework, 42 * who will detach and remove this devinfo node (see i_xvdi_oestate_handler). 43 * I/O requests get from ring buffer and event coming from xenstore cannot be 44 * trusted. We verify them in xdb_get_buf() and xdb_check_state_transition(). 45 * 46 * Virtual device configuration is read/written from/to the database via 47 * xenbus_* interfaces. Driver also use xvdi_* to interact with hypervisor. 48 * There is an on-going effort to make xvdi_* cover all xenbus_*. 49 */ 50 51 #include <sys/types.h> 52 #include <sys/conf.h> 53 #include <sys/ddi.h> 54 #include <sys/dditypes.h> 55 #include <sys/sunddi.h> 56 #include <sys/list.h> 57 #include <sys/dkio.h> 58 #include <sys/cmlb.h> 59 #include <sys/vtoc.h> 60 #include <sys/modctl.h> 61 #include <sys/bootconf.h> 62 #include <sys/promif.h> 63 #include <sys/sysmacros.h> 64 #include <public/io/xenbus.h> 65 #include <xen/sys/xenbus_impl.h> 66 #include <xen/sys/xendev.h> 67 #include <sys/gnttab.h> 68 #include <sys/scsi/generic/inquiry.h> 69 #include <vm/seg_kmem.h> 70 #include <vm/hat_i86.h> 71 #include <sys/gnttab.h> 72 #include <sys/lofi.h> 73 #include <io/xdf.h> 74 #include <xen/io/blkif_impl.h> 75 #include <io/xdb.h> 76 77 static xdb_t *xdb_statep; 78 static int xdb_debug = 0; 79 80 static int xdb_push_response(xdb_t *, uint64_t, uint8_t, uint16_t); 81 static int xdb_get_request(xdb_t *, blkif_request_t *); 82 static void blkif_get_x86_32_req(blkif_request_t *, blkif_x86_32_request_t *); 83 static void blkif_get_x86_64_req(blkif_request_t *, blkif_x86_64_request_t *); 84 85 #ifdef DEBUG 86 /* 87 * debug aid functions 88 */ 89 90 static void 91 logva(xdb_t *vdp, uint64_t va) 92 { 93 uint64_t *page_addrs; 94 int i; 95 96 page_addrs = vdp->page_addrs; 97 for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) { 98 if (page_addrs[i] == va) 99 debug_enter("VA remapping found!"); 100 } 101 102 for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) { 103 if (page_addrs[i] == 0) { 104 page_addrs[i] = va; 105 break; 106 } 107 } 108 ASSERT(i < XDB_MAX_IO_PAGES(vdp)); 109 } 110 111 static void 112 unlogva(xdb_t *vdp, uint64_t va) 113 { 114 uint64_t *page_addrs; 115 int i; 116 117 page_addrs = vdp->page_addrs; 118 for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) { 119 if (page_addrs[i] == va) { 120 page_addrs[i] = 0; 121 break; 122 } 123 } 124 ASSERT(i < XDB_MAX_IO_PAGES(vdp)); 125 } 126 127 static void 128 xdb_dump_request_oe(blkif_request_t *req) 129 { 130 int i; 131 132 /* 133 * Exploit the public interface definitions for BLKIF_OP_READ 134 * etc.. 135 */ 136 char *op_name[] = { "read", "write", "barrier", "flush" }; 137 138 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "op=%s", op_name[req->operation])); 139 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "num of segments=%d", 140 req->nr_segments)); 141 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "handle=%d", req->handle)); 142 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "id=%llu", 143 (unsigned long long)req->id)); 144 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "start sector=%llu", 145 (unsigned long long)req->sector_number)); 146 for (i = 0; i < req->nr_segments; i++) { 147 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "gref=%d, first sec=%d," 148 "last sec=%d", req->seg[i].gref, req->seg[i].first_sect, 149 req->seg[i].last_sect)); 150 } 151 } 152 #endif /* DEBUG */ 153 154 /* 155 * Statistics. 156 */ 157 static char *xdb_stats[] = { 158 "rd_reqs", 159 "wr_reqs", 160 "br_reqs", 161 "fl_reqs", 162 "oo_reqs" 163 }; 164 165 static int 166 xdb_kstat_update(kstat_t *ksp, int flag) 167 { 168 xdb_t *vdp; 169 kstat_named_t *knp; 170 171 if (flag != KSTAT_READ) 172 return (EACCES); 173 174 vdp = ksp->ks_private; 175 knp = ksp->ks_data; 176 177 /* 178 * Assignment order should match that of the names in 179 * xdb_stats. 180 */ 181 (knp++)->value.ui64 = vdp->xs_stat_req_reads; 182 (knp++)->value.ui64 = vdp->xs_stat_req_writes; 183 (knp++)->value.ui64 = vdp->xs_stat_req_barriers; 184 (knp++)->value.ui64 = vdp->xs_stat_req_flushes; 185 (knp++)->value.ui64 = 0; /* oo_req */ 186 187 return (0); 188 } 189 190 static boolean_t 191 xdb_kstat_init(xdb_t *vdp) 192 { 193 int nstat = sizeof (xdb_stats) / sizeof (xdb_stats[0]); 194 char **cp = xdb_stats; 195 kstat_named_t *knp; 196 197 if ((vdp->xs_kstats = kstat_create("xdb", 198 ddi_get_instance(vdp->xs_dip), 199 "req_statistics", "block", KSTAT_TYPE_NAMED, 200 nstat, 0)) == NULL) 201 return (B_FALSE); 202 203 vdp->xs_kstats->ks_private = vdp; 204 vdp->xs_kstats->ks_update = xdb_kstat_update; 205 206 knp = vdp->xs_kstats->ks_data; 207 while (nstat > 0) { 208 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 209 knp++; 210 cp++; 211 nstat--; 212 } 213 214 kstat_install(vdp->xs_kstats); 215 216 return (B_TRUE); 217 } 218 219 static int xdb_biodone(buf_t *); 220 221 static buf_t * 222 xdb_get_buf(xdb_t *vdp, blkif_request_t *req, xdb_request_t *xreq) 223 { 224 buf_t *bp; 225 uint8_t segs, curseg; 226 int sectors; 227 int i, err; 228 gnttab_map_grant_ref_t mapops[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 229 ddi_acc_handle_t acchdl; 230 231 acchdl = vdp->xs_ring_hdl; 232 bp = XDB_XREQ2BP(xreq); 233 curseg = xreq->xr_curseg; 234 /* init a new xdb request */ 235 if (req != NULL) { 236 ASSERT(MUTEX_HELD(&vdp->xs_iomutex)); 237 boolean_t pagemapok = B_TRUE; 238 uint8_t op = ddi_get8(acchdl, &req->operation); 239 240 xreq->xr_vdp = vdp; 241 xreq->xr_op = op; 242 xreq->xr_id = ddi_get64(acchdl, &req->id); 243 segs = xreq->xr_buf_pages = ddi_get8(acchdl, &req->nr_segments); 244 if (segs == 0) { 245 if (op != BLKIF_OP_FLUSH_DISKCACHE) 246 cmn_err(CE_WARN, "!non-BLKIF_OP_FLUSH_DISKCACHE" 247 " is seen from domain %d with zero " 248 "length data buffer!", vdp->xs_peer); 249 bioinit(bp); 250 bp->b_bcount = 0; 251 bp->b_lblkno = 0; 252 bp->b_un.b_addr = NULL; 253 return (bp); 254 } else if (op == BLKIF_OP_FLUSH_DISKCACHE) { 255 cmn_err(CE_WARN, "!BLKIF_OP_FLUSH_DISKCACHE" 256 " is seen from domain %d with non-zero " 257 "length data buffer!", vdp->xs_peer); 258 } 259 260 /* 261 * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST 262 * according to the definition of blk interface by Xen 263 * we do sanity check here 264 */ 265 if (segs > BLKIF_MAX_SEGMENTS_PER_REQUEST) 266 segs = xreq->xr_buf_pages = 267 BLKIF_MAX_SEGMENTS_PER_REQUEST; 268 269 for (i = 0; i < segs; i++) { 270 uint8_t fs, ls; 271 272 mapops[i].host_addr = 273 (uint64_t)(uintptr_t)XDB_IOPAGE_VA( 274 vdp->xs_iopage_va, xreq->xr_idx, i); 275 mapops[i].dom = vdp->xs_peer; 276 mapops[i].ref = ddi_get32(acchdl, &req->seg[i].gref); 277 mapops[i].flags = GNTMAP_host_map; 278 if (op != BLKIF_OP_READ) 279 mapops[i].flags |= GNTMAP_readonly; 280 281 fs = ddi_get8(acchdl, &req->seg[i].first_sect); 282 ls = ddi_get8(acchdl, &req->seg[i].last_sect); 283 284 /* 285 * first_sect should be no bigger than last_sect and 286 * both of them should be no bigger than 287 * (PAGESIZE / XB_BSIZE - 1) according to definition 288 * of blk interface by Xen, so sanity check again 289 */ 290 if (fs > (PAGESIZE / XB_BSIZE - 1)) 291 fs = PAGESIZE / XB_BSIZE - 1; 292 if (ls > (PAGESIZE / XB_BSIZE - 1)) 293 ls = PAGESIZE / XB_BSIZE - 1; 294 if (fs > ls) 295 fs = ls; 296 297 xreq->xr_segs[i].fs = fs; 298 xreq->xr_segs[i].ls = ls; 299 } 300 301 /* map in io pages */ 302 err = xen_map_gref(GNTTABOP_map_grant_ref, mapops, i, B_FALSE); 303 if (err != 0) 304 return (NULL); 305 for (i = 0; i < segs; i++) { 306 /* 307 * Although HYPERVISOR_grant_table_op() returned no 308 * error, mapping of each single page can fail. So, 309 * we have to do the check here and handle the error 310 * if needed 311 */ 312 if (mapops[i].status != GNTST_okay) { 313 int j; 314 for (j = 0; j < i; j++) { 315 #ifdef DEBUG 316 unlogva(vdp, mapops[j].host_addr); 317 #endif 318 xen_release_pfn( 319 xreq->xr_plist[j].p_pagenum); 320 } 321 pagemapok = B_FALSE; 322 break; 323 } 324 /* record page mapping handle for unmapping later */ 325 xreq->xr_page_hdls[i] = mapops[i].handle; 326 #ifdef DEBUG 327 logva(vdp, mapops[i].host_addr); 328 #endif 329 /* 330 * Pass the MFNs down using the shadow list (xr_pplist) 331 * 332 * This is pretty ugly since we have implict knowledge 333 * of how the rootnex binds buffers. 334 * The GNTTABOP_map_grant_ref op makes us do some ugly 335 * stuff since we're not allowed to touch these PTEs 336 * from the VM. 337 * 338 * Obviously, these aren't real page_t's. The rootnex 339 * only needs p_pagenum. 340 * Also, don't use btop() here or 32 bit PAE breaks. 341 */ 342 xreq->xr_pplist[i] = &xreq->xr_plist[i]; 343 xreq->xr_plist[i].p_pagenum = 344 xen_assign_pfn(mapops[i].dev_bus_addr >> PAGESHIFT); 345 } 346 347 /* 348 * not all pages mapped in successfully, unmap those mapped-in 349 * page and return failure 350 */ 351 if (!pagemapok) { 352 gnttab_unmap_grant_ref_t unmapop; 353 354 for (i = 0; i < segs; i++) { 355 if (mapops[i].status != GNTST_okay) 356 continue; 357 unmapop.host_addr = 358 (uint64_t)(uintptr_t)XDB_IOPAGE_VA( 359 vdp->xs_iopage_va, xreq->xr_idx, i); 360 unmapop.dev_bus_addr = NULL; 361 unmapop.handle = mapops[i].handle; 362 (void) HYPERVISOR_grant_table_op( 363 GNTTABOP_unmap_grant_ref, &unmapop, 1); 364 } 365 366 return (NULL); 367 } 368 bioinit(bp); 369 bp->b_lblkno = ddi_get64(acchdl, &req->sector_number); 370 bp->b_flags = B_BUSY | B_SHADOW | B_PHYS; 371 bp->b_flags |= (ddi_get8(acchdl, &req->operation) == 372 BLKIF_OP_READ) ? B_READ : (B_WRITE | B_ASYNC); 373 } else { 374 uint64_t blkst; 375 int isread; 376 377 /* reuse this buf */ 378 blkst = bp->b_lblkno + bp->b_bcount / DEV_BSIZE; 379 isread = bp->b_flags & B_READ; 380 bioreset(bp); 381 bp->b_lblkno = blkst; 382 bp->b_flags = B_BUSY | B_SHADOW | B_PHYS; 383 bp->b_flags |= isread ? B_READ : (B_WRITE | B_ASYNC); 384 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "reuse buf, xreq is %d!!", 385 xreq->xr_idx)); 386 } 387 388 /* form a buf */ 389 bp->b_un.b_addr = XDB_IOPAGE_VA(vdp->xs_iopage_va, xreq->xr_idx, 390 curseg) + xreq->xr_segs[curseg].fs * DEV_BSIZE; 391 bp->b_shadow = &xreq->xr_pplist[curseg]; 392 bp->b_iodone = xdb_biodone; 393 sectors = 0; 394 for (i = curseg; i < xreq->xr_buf_pages; i++) { 395 /* 396 * The xreq->xr_segs[i].fs of the first seg can be non-zero 397 * otherwise, we'll break it into multiple bufs 398 */ 399 if ((i != curseg) && (xreq->xr_segs[i].fs != 0)) { 400 break; 401 } 402 sectors += (xreq->xr_segs[i].ls - xreq->xr_segs[i].fs + 1); 403 } 404 xreq->xr_curseg = i; 405 bp->b_bcount = sectors * DEV_BSIZE; 406 bp->b_bufsize = bp->b_bcount; 407 408 return (bp); 409 } 410 411 static xdb_request_t * 412 xdb_get_req(xdb_t *vdp) 413 { 414 xdb_request_t *req; 415 int idx; 416 417 ASSERT(MUTEX_HELD(&vdp->xs_iomutex)); 418 ASSERT(vdp->xs_free_req != -1); 419 req = &vdp->xs_req[vdp->xs_free_req]; 420 vdp->xs_free_req = req->xr_next; 421 idx = req->xr_idx; 422 bzero(req, sizeof (xdb_request_t)); 423 req->xr_idx = idx; 424 return (req); 425 } 426 427 static void 428 xdb_free_req(xdb_request_t *req) 429 { 430 xdb_t *vdp = req->xr_vdp; 431 432 ASSERT(MUTEX_HELD(&vdp->xs_iomutex)); 433 req->xr_next = vdp->xs_free_req; 434 vdp->xs_free_req = req->xr_idx; 435 } 436 437 static void 438 xdb_response(xdb_t *vdp, blkif_request_t *req, boolean_t ok) 439 { 440 ddi_acc_handle_t acchdl = vdp->xs_ring_hdl; 441 442 if (xdb_push_response(vdp, ddi_get64(acchdl, &req->id), 443 ddi_get8(acchdl, &req->operation), ok)) 444 xvdi_notify_oe(vdp->xs_dip); 445 } 446 447 static void 448 xdb_init_ioreqs(xdb_t *vdp) 449 { 450 int i; 451 452 ASSERT(vdp->xs_nentry); 453 454 if (vdp->xs_req == NULL) 455 vdp->xs_req = kmem_alloc(vdp->xs_nentry * 456 sizeof (xdb_request_t), KM_SLEEP); 457 #ifdef DEBUG 458 if (vdp->page_addrs == NULL) 459 vdp->page_addrs = kmem_zalloc(XDB_MAX_IO_PAGES(vdp) * 460 sizeof (uint64_t), KM_SLEEP); 461 #endif 462 for (i = 0; i < vdp->xs_nentry; i++) { 463 vdp->xs_req[i].xr_idx = i; 464 vdp->xs_req[i].xr_next = i + 1; 465 } 466 vdp->xs_req[vdp->xs_nentry - 1].xr_next = -1; 467 vdp->xs_free_req = 0; 468 469 /* alloc va in host dom for io page mapping */ 470 vdp->xs_iopage_va = vmem_xalloc(heap_arena, 471 XDB_MAX_IO_PAGES(vdp) * PAGESIZE, PAGESIZE, 0, 0, 0, 0, 472 VM_SLEEP); 473 for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) 474 hat_prepare_mapping(kas.a_hat, 475 vdp->xs_iopage_va + i * PAGESIZE, NULL); 476 } 477 478 static void 479 xdb_uninit_ioreqs(xdb_t *vdp) 480 { 481 int i; 482 483 for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) 484 hat_release_mapping(kas.a_hat, 485 vdp->xs_iopage_va + i * PAGESIZE); 486 vmem_xfree(heap_arena, vdp->xs_iopage_va, 487 XDB_MAX_IO_PAGES(vdp) * PAGESIZE); 488 if (vdp->xs_req != NULL) { 489 kmem_free(vdp->xs_req, vdp->xs_nentry * sizeof (xdb_request_t)); 490 vdp->xs_req = NULL; 491 } 492 #ifdef DEBUG 493 if (vdp->page_addrs != NULL) { 494 kmem_free(vdp->page_addrs, XDB_MAX_IO_PAGES(vdp) * 495 sizeof (uint64_t)); 496 vdp->page_addrs = NULL; 497 } 498 #endif 499 } 500 501 static uint_t 502 xdb_intr(caddr_t arg) 503 { 504 blkif_request_t req; 505 blkif_request_t *reqp = &req; 506 xdb_request_t *xreq; 507 buf_t *bp; 508 uint8_t op; 509 xdb_t *vdp = (xdb_t *)arg; 510 int ret = DDI_INTR_UNCLAIMED; 511 dev_info_t *dip = vdp->xs_dip; 512 513 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, 514 "xdb@%s: I/O request received from dom %d", 515 ddi_get_name_addr(dip), vdp->xs_peer)); 516 517 mutex_enter(&vdp->xs_iomutex); 518 519 /* shouldn't touch ring buffer if not in connected state */ 520 if (vdp->xs_if_status != XDB_CONNECTED) { 521 mutex_exit(&vdp->xs_iomutex); 522 return (DDI_INTR_UNCLAIMED); 523 } 524 525 /* 526 * We'll loop till there is no more request in the ring 527 * We won't stuck in this loop for ever since the size of ring buffer 528 * is limited, and frontend will stop pushing requests into it when 529 * the ring buffer is full 530 */ 531 532 /* req_event will be increased in xvdi_ring_get_request() */ 533 while (xdb_get_request(vdp, reqp)) { 534 ret = DDI_INTR_CLAIMED; 535 536 op = ddi_get8(vdp->xs_ring_hdl, &reqp->operation); 537 if (op == BLKIF_OP_READ || 538 op == BLKIF_OP_WRITE || 539 op == BLKIF_OP_WRITE_BARRIER || 540 op == BLKIF_OP_FLUSH_DISKCACHE) { 541 #ifdef DEBUG 542 xdb_dump_request_oe(reqp); 543 #endif 544 xreq = xdb_get_req(vdp); 545 ASSERT(xreq); 546 switch (op) { 547 case BLKIF_OP_READ: 548 vdp->xs_stat_req_reads++; 549 break; 550 case BLKIF_OP_WRITE_BARRIER: 551 vdp->xs_stat_req_barriers++; 552 /* FALLTHRU */ 553 case BLKIF_OP_WRITE: 554 vdp->xs_stat_req_writes++; 555 break; 556 case BLKIF_OP_FLUSH_DISKCACHE: 557 vdp->xs_stat_req_flushes++; 558 break; 559 } 560 561 xreq->xr_curseg = 0; /* start from first segment */ 562 bp = xdb_get_buf(vdp, reqp, xreq); 563 if (bp == NULL) { 564 /* failed to form a buf */ 565 xdb_free_req(xreq); 566 xdb_response(vdp, reqp, B_FALSE); 567 continue; 568 } 569 bp->av_forw = NULL; 570 571 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, 572 " buf %p, blkno %lld, size %lu, addr %p", 573 (void *)bp, (longlong_t)bp->b_blkno, 574 (ulong_t)bp->b_bcount, (void *)bp->b_un.b_addr)); 575 576 /* send bp to underlying blk driver */ 577 if (vdp->xs_f_iobuf == NULL) { 578 vdp->xs_f_iobuf = vdp->xs_l_iobuf = bp; 579 } else { 580 vdp->xs_l_iobuf->av_forw = bp; 581 vdp->xs_l_iobuf = bp; 582 } 583 } else { 584 xdb_response(vdp, reqp, B_FALSE); 585 XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: " 586 "Unsupported cmd received from dom %d", 587 ddi_get_name_addr(dip), vdp->xs_peer)); 588 } 589 } 590 /* notify our taskq to push buf to underlying blk driver */ 591 if (ret == DDI_INTR_CLAIMED) 592 cv_broadcast(&vdp->xs_iocv); 593 594 mutex_exit(&vdp->xs_iomutex); 595 596 return (ret); 597 } 598 599 static int 600 xdb_biodone(buf_t *bp) 601 { 602 int i, err, bioerr; 603 uint8_t segs; 604 gnttab_unmap_grant_ref_t unmapops[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 605 xdb_request_t *xreq = XDB_BP2XREQ(bp); 606 xdb_t *vdp = xreq->xr_vdp; 607 buf_t *nbp; 608 609 bioerr = geterror(bp); 610 if (bioerr) 611 XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: I/O error %d", 612 ddi_get_name_addr(vdp->xs_dip), bioerr)); 613 614 /* check if we are done w/ this I/O request */ 615 if ((bioerr == 0) && (xreq->xr_curseg < xreq->xr_buf_pages)) { 616 nbp = xdb_get_buf(vdp, NULL, xreq); 617 if (nbp) { 618 err = ldi_strategy(vdp->xs_ldi_hdl, nbp); 619 if (err == 0) { 620 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, 621 "sent buf to backend ok")); 622 return (DDI_SUCCESS); 623 } 624 bioerr = EIO; 625 XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: " 626 "sent buf to backend dev failed, err=%d", 627 ddi_get_name_addr(vdp->xs_dip), err)); 628 } else { 629 bioerr = EIO; 630 } 631 } 632 633 /* unmap io pages */ 634 segs = xreq->xr_buf_pages; 635 /* 636 * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST 637 * according to the definition of blk interface by Xen 638 */ 639 ASSERT(segs <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 640 for (i = 0; i < segs; i++) { 641 unmapops[i].host_addr = (uint64_t)(uintptr_t)XDB_IOPAGE_VA( 642 vdp->xs_iopage_va, xreq->xr_idx, i); 643 #ifdef DEBUG 644 mutex_enter(&vdp->xs_iomutex); 645 unlogva(vdp, unmapops[i].host_addr); 646 mutex_exit(&vdp->xs_iomutex); 647 #endif 648 unmapops[i].dev_bus_addr = NULL; 649 unmapops[i].handle = xreq->xr_page_hdls[i]; 650 } 651 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 652 unmapops, segs); 653 ASSERT(!err); 654 655 /* 656 * If we have reached a barrier write or a cache flush , then we must 657 * flush all our I/Os. 658 */ 659 if (xreq->xr_op == BLKIF_OP_WRITE_BARRIER || 660 xreq->xr_op == BLKIF_OP_FLUSH_DISKCACHE) { 661 /* 662 * XXX At this point the write did succeed, so I don't 663 * believe we should report an error because the flush 664 * failed. However, this is a debatable point, so 665 * maybe we need to think more carefully about this. 666 * For now, just cast to void. 667 */ 668 (void) ldi_ioctl(vdp->xs_ldi_hdl, 669 DKIOCFLUSHWRITECACHE, NULL, FKIOCTL, kcred, NULL); 670 } 671 672 mutex_enter(&vdp->xs_iomutex); 673 674 /* send response back to frontend */ 675 if (vdp->xs_if_status == XDB_CONNECTED) { 676 if (xdb_push_response(vdp, xreq->xr_id, xreq->xr_op, bioerr)) 677 xvdi_notify_oe(vdp->xs_dip); 678 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, 679 "sent resp back to frontend, id=%llu", 680 (unsigned long long)xreq->xr_id)); 681 } 682 /* free io resources */ 683 biofini(bp); 684 xdb_free_req(xreq); 685 686 vdp->xs_ionum--; 687 if ((vdp->xs_if_status != XDB_CONNECTED) && (vdp->xs_ionum == 0)) { 688 /* we're closing, someone is waiting for I/O clean-up */ 689 cv_signal(&vdp->xs_ionumcv); 690 } 691 692 mutex_exit(&vdp->xs_iomutex); 693 694 return (DDI_SUCCESS); 695 } 696 697 static int 698 xdb_bindto_frontend(xdb_t *vdp) 699 { 700 int err; 701 char *oename; 702 grant_ref_t gref; 703 evtchn_port_t evtchn; 704 dev_info_t *dip = vdp->xs_dip; 705 char protocol[64] = ""; 706 707 /* 708 * Gather info from frontend 709 */ 710 oename = xvdi_get_oename(dip); 711 if (oename == NULL) 712 return (DDI_FAILURE); 713 714 err = xenbus_gather(XBT_NULL, oename, 715 "ring-ref", "%lu", &gref, "event-channel", "%u", &evtchn, NULL); 716 if (err != 0) { 717 xvdi_fatal_error(dip, err, 718 "Getting ring-ref and evtchn from frontend"); 719 return (DDI_FAILURE); 720 } 721 722 vdp->xs_blk_protocol = BLKIF_PROTOCOL_NATIVE; 723 vdp->xs_nentry = BLKIF_RING_SIZE; 724 vdp->xs_entrysize = sizeof (union blkif_sring_entry); 725 726 err = xenbus_gather(XBT_NULL, oename, 727 "protocol", "%63s", protocol, NULL); 728 if (err) 729 (void) strcpy(protocol, "unspecified, assuming native"); 730 else { 731 /* 732 * We must check for NATIVE first, so that the fast path 733 * is taken for copying data from the guest to the host. 734 */ 735 if (strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE) != 0) { 736 if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_32) == 0) { 737 vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_32; 738 vdp->xs_nentry = BLKIF_X86_32_RING_SIZE; 739 vdp->xs_entrysize = 740 sizeof (union blkif_x86_32_sring_entry); 741 } else if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_64) == 742 0) { 743 vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_64; 744 vdp->xs_nentry = BLKIF_X86_64_RING_SIZE; 745 vdp->xs_entrysize = 746 sizeof (union blkif_x86_64_sring_entry); 747 } else { 748 xvdi_fatal_error(dip, err, "unknown protocol"); 749 return (DDI_FAILURE); 750 } 751 } 752 } 753 #ifdef DEBUG 754 cmn_err(CE_NOTE, "!xdb@%s: blkif protocol '%s' ", 755 ddi_get_name_addr(dip), protocol); 756 #endif 757 758 /* 759 * map and init ring 760 * 761 * The ring parameters must match those which have been allocated 762 * in the front end. 763 */ 764 err = xvdi_map_ring(dip, vdp->xs_nentry, vdp->xs_entrysize, 765 gref, &vdp->xs_ring); 766 if (err != DDI_SUCCESS) 767 return (DDI_FAILURE); 768 /* 769 * This will be removed after we use shadow I/O ring request since 770 * we don't need to access the ring itself directly, thus the access 771 * handle is not needed 772 */ 773 vdp->xs_ring_hdl = vdp->xs_ring->xr_acc_hdl; 774 775 /* 776 * bind event channel 777 */ 778 err = xvdi_bind_evtchn(dip, evtchn); 779 if (err != DDI_SUCCESS) { 780 xvdi_unmap_ring(vdp->xs_ring); 781 return (DDI_FAILURE); 782 } 783 784 return (DDI_SUCCESS); 785 } 786 787 static void 788 xdb_unbindfrom_frontend(xdb_t *vdp) 789 { 790 xvdi_free_evtchn(vdp->xs_dip); 791 xvdi_unmap_ring(vdp->xs_ring); 792 } 793 794 #define LOFI_CTRL_NODE "/dev/lofictl" 795 #define LOFI_DEV_NODE "/devices/pseudo/lofi@0:" 796 #define LOFI_MODE FREAD | FWRITE | FEXCL 797 798 static int 799 xdb_setup_node(xdb_t *vdp, char *path) 800 { 801 dev_info_t *dip; 802 char *xsnode, *node; 803 ldi_handle_t ldi_hdl; 804 struct lofi_ioctl *li; 805 int minor; 806 int err; 807 unsigned int len; 808 809 dip = vdp->xs_dip; 810 xsnode = xvdi_get_xsname(dip); 811 if (xsnode == NULL) 812 return (DDI_FAILURE); 813 814 err = xenbus_read(XBT_NULL, xsnode, "dynamic-device-path", 815 (void **)&node, &len); 816 if (err == ENOENT) 817 err = xenbus_read(XBT_NULL, xsnode, "params", (void **)&node, 818 &len); 819 if (err != 0) { 820 xvdi_fatal_error(vdp->xs_dip, err, "reading 'params'"); 821 return (DDI_FAILURE); 822 } 823 824 if (!XDB_IS_LOFI(vdp)) { 825 (void) strlcpy(path, node, MAXPATHLEN); 826 kmem_free(node, len); 827 return (DDI_SUCCESS); 828 } 829 830 do { 831 err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred, 832 &ldi_hdl, vdp->xs_ldi_li); 833 } while (err == EBUSY); 834 if (err != 0) { 835 kmem_free(node, len); 836 return (DDI_FAILURE); 837 } 838 839 li = kmem_zalloc(sizeof (*li), KM_SLEEP); 840 (void) strlcpy(li->li_filename, node, MAXPATHLEN); 841 kmem_free(node, len); 842 if (ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li, 843 LOFI_MODE | FKIOCTL, kcred, &minor) != 0) { 844 cmn_err(CE_WARN, "xdb@%s: Failed to create lofi dev for %s", 845 ddi_get_name_addr(dip), li->li_filename); 846 (void) ldi_close(ldi_hdl, LOFI_MODE, kcred); 847 kmem_free(li, sizeof (*li)); 848 return (DDI_FAILURE); 849 } 850 /* 851 * return '/devices/...' instead of '/dev/lofi/...' since the 852 * former is available immediately after calling ldi_ioctl 853 */ 854 (void) snprintf(path, MAXPATHLEN, LOFI_DEV_NODE "%d", minor); 855 (void) xenbus_printf(XBT_NULL, xsnode, "node", "%s", path); 856 (void) ldi_close(ldi_hdl, LOFI_MODE, kcred); 857 kmem_free(li, sizeof (*li)); 858 return (DDI_SUCCESS); 859 } 860 861 static void 862 xdb_teardown_node(xdb_t *vdp) 863 { 864 dev_info_t *dip; 865 char *xsnode, *node; 866 ldi_handle_t ldi_hdl; 867 struct lofi_ioctl *li; 868 int err; 869 unsigned int len; 870 871 if (!XDB_IS_LOFI(vdp)) 872 return; 873 874 dip = vdp->xs_dip; 875 xsnode = xvdi_get_xsname(dip); 876 if (xsnode == NULL) 877 return; 878 879 err = xenbus_read(XBT_NULL, xsnode, "dynamic-device-path", 880 (void **)&node, &len); 881 if (err == ENOENT) 882 err = xenbus_read(XBT_NULL, xsnode, "params", (void **)&node, 883 &len); 884 if (err != 0) { 885 xvdi_fatal_error(vdp->xs_dip, err, "reading 'params'"); 886 return; 887 } 888 889 li = kmem_zalloc(sizeof (*li), KM_SLEEP); 890 (void) strlcpy(li->li_filename, node, MAXPATHLEN); 891 kmem_free(node, len); 892 893 do { 894 err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred, 895 &ldi_hdl, vdp->xs_ldi_li); 896 } while (err == EBUSY); 897 898 if (err != 0) { 899 kmem_free(li, sizeof (*li)); 900 return; 901 } 902 903 if (ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE, (intptr_t)li, 904 LOFI_MODE | FKIOCTL, kcred, NULL) != 0) { 905 cmn_err(CE_WARN, "xdb@%s: Failed to delete lofi dev for %s", 906 ddi_get_name_addr(dip), li->li_filename); 907 } 908 909 (void) ldi_close(ldi_hdl, LOFI_MODE, kcred); 910 kmem_free(li, sizeof (*li)); 911 } 912 913 static int 914 xdb_open_device(xdb_t *vdp) 915 { 916 uint64_t devsize; 917 dev_info_t *dip; 918 char *xsnode; 919 char *nodepath; 920 char *mode = NULL; 921 char *type = NULL; 922 int err; 923 924 dip = vdp->xs_dip; 925 xsnode = xvdi_get_xsname(dip); 926 if (xsnode == NULL) 927 return (DDI_FAILURE); 928 929 err = xenbus_gather(XBT_NULL, xsnode, 930 "mode", NULL, &mode, "type", NULL, &type, NULL); 931 if (err != 0) { 932 if (mode) 933 kmem_free(mode, strlen(mode) + 1); 934 if (type) 935 kmem_free(type, strlen(type) + 1); 936 xvdi_fatal_error(dip, err, 937 "Getting mode and type from backend device"); 938 return (DDI_FAILURE); 939 } 940 if (strcmp(type, "file") == 0) { 941 vdp->xs_type |= XDB_DEV_LOFI; 942 } 943 kmem_free(type, strlen(type) + 1); 944 if ((strcmp(mode, "r") == NULL) || (strcmp(mode, "ro") == NULL)) { 945 vdp->xs_type |= XDB_DEV_RO; 946 } 947 kmem_free(mode, strlen(mode) + 1); 948 949 /* 950 * try to open backend device 951 */ 952 if (ldi_ident_from_dip(dip, &vdp->xs_ldi_li) != 0) 953 return (DDI_FAILURE); 954 955 nodepath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 956 err = xdb_setup_node(vdp, nodepath); 957 if (err != DDI_SUCCESS) { 958 xvdi_fatal_error(dip, err, 959 "Getting device path of backend device"); 960 ldi_ident_release(vdp->xs_ldi_li); 961 kmem_free(nodepath, MAXPATHLEN); 962 return (DDI_FAILURE); 963 } 964 965 if (*nodepath == '\0') { 966 /* Allow a CD-ROM device with an empty backend. */ 967 vdp->xs_sectors = 0; 968 kmem_free(nodepath, MAXPATHLEN); 969 return (DDI_SUCCESS); 970 } 971 972 if (ldi_open_by_name(nodepath, 973 FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), 974 kcred, &vdp->xs_ldi_hdl, vdp->xs_ldi_li) != 0) { 975 xdb_teardown_node(vdp); 976 ldi_ident_release(vdp->xs_ldi_li); 977 cmn_err(CE_WARN, "xdb@%s: Failed to open: %s", 978 ddi_get_name_addr(dip), nodepath); 979 kmem_free(nodepath, MAXPATHLEN); 980 return (DDI_FAILURE); 981 } 982 983 /* check if it's a CD/DVD disc */ 984 if (ldi_prop_get_int(vdp->xs_ldi_hdl, LDI_DEV_T_ANY | DDI_PROP_DONTPASS, 985 "inquiry-device-type", DTYPE_DIRECT) == DTYPE_RODIRECT) 986 vdp->xs_type |= XDB_DEV_CD; 987 /* check if it's a removable disk */ 988 if (ldi_prop_exists(vdp->xs_ldi_hdl, 989 LDI_DEV_T_ANY | DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 990 "removable-media")) 991 vdp->xs_type |= XDB_DEV_RMB; 992 993 if (ldi_get_size(vdp->xs_ldi_hdl, &devsize) != DDI_SUCCESS) { 994 (void) ldi_close(vdp->xs_ldi_hdl, 995 FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred); 996 xdb_teardown_node(vdp); 997 ldi_ident_release(vdp->xs_ldi_li); 998 kmem_free(nodepath, MAXPATHLEN); 999 return (DDI_FAILURE); 1000 } 1001 vdp->xs_sectors = devsize / XB_BSIZE; 1002 1003 kmem_free(nodepath, MAXPATHLEN); 1004 return (DDI_SUCCESS); 1005 } 1006 1007 static void 1008 xdb_close_device(xdb_t *vdp) 1009 { 1010 (void) ldi_close(vdp->xs_ldi_hdl, 1011 FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred); 1012 xdb_teardown_node(vdp); 1013 ldi_ident_release(vdp->xs_ldi_li); 1014 vdp->xs_ldi_li = NULL; 1015 vdp->xs_ldi_hdl = NULL; 1016 } 1017 1018 /* 1019 * Kick-off connect process 1020 * If xs_fe_status == XDB_FE_READY and xs_dev_status == XDB_DEV_READY 1021 * the xs_if_status will be changed to XDB_CONNECTED on success, 1022 * otherwise, xs_if_status will not be changed 1023 */ 1024 static int 1025 xdb_start_connect(xdb_t *vdp) 1026 { 1027 uint32_t dinfo; 1028 xenbus_transaction_t xbt; 1029 int err, svdst; 1030 char *xsnode; 1031 dev_info_t *dip = vdp->xs_dip; 1032 char *barrier; 1033 uint_t len; 1034 1035 /* 1036 * Start connect to frontend only when backend device are ready 1037 * and frontend has moved to XenbusStateInitialised, which means 1038 * ready to connect 1039 */ 1040 ASSERT((vdp->xs_fe_status == XDB_FE_READY) && 1041 (vdp->xs_dev_status == XDB_DEV_READY)); 1042 1043 if (((xsnode = xvdi_get_xsname(dip)) == NULL) || 1044 ((vdp->xs_peer = xvdi_get_oeid(dip)) == (domid_t)-1) || 1045 (xdb_open_device(vdp) != DDI_SUCCESS)) 1046 return (DDI_FAILURE); 1047 1048 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialised); 1049 1050 if (xdb_bindto_frontend(vdp) != DDI_SUCCESS) 1051 goto errout1; 1052 1053 /* init i/o requests */ 1054 xdb_init_ioreqs(vdp); 1055 1056 if (ddi_add_intr(dip, 0, NULL, NULL, xdb_intr, (caddr_t)vdp) 1057 != DDI_SUCCESS) 1058 goto errout2; 1059 1060 /* 1061 * we can recieve intr any time from now on 1062 * mark that we're ready to take intr 1063 */ 1064 mutex_enter(&vdp->xs_iomutex); 1065 /* 1066 * save it in case we need to restore when we 1067 * fail to write xenstore later 1068 */ 1069 svdst = vdp->xs_if_status; 1070 vdp->xs_if_status = XDB_CONNECTED; 1071 mutex_exit(&vdp->xs_iomutex); 1072 1073 /* write into xenstore the info needed by frontend */ 1074 trans_retry: 1075 if (xenbus_transaction_start(&xbt)) { 1076 xvdi_fatal_error(dip, EIO, "transaction start"); 1077 goto errout3; 1078 } 1079 1080 /* 1081 * If feature-barrier isn't present in xenstore, add it. 1082 */ 1083 if (xenbus_read(xbt, xsnode, "feature-barrier", 1084 (void **)&barrier, &len) != 0) { 1085 if ((err = xenbus_printf(xbt, xsnode, "feature-barrier", 1086 "%d", 1)) != 0) { 1087 cmn_err(CE_WARN, "xdb@%s: failed to write " 1088 "'feature-barrier'", ddi_get_name_addr(dip)); 1089 xvdi_fatal_error(dip, err, "writing 'feature-barrier'"); 1090 goto abort_trans; 1091 } 1092 } else 1093 kmem_free(barrier, len); 1094 1095 dinfo = 0; 1096 if (XDB_IS_RO(vdp)) 1097 dinfo |= VDISK_READONLY; 1098 if (XDB_IS_CD(vdp)) 1099 dinfo |= VDISK_CDROM; 1100 if (XDB_IS_RMB(vdp)) 1101 dinfo |= VDISK_REMOVABLE; 1102 if (err = xenbus_printf(xbt, xsnode, "info", "%u", dinfo)) { 1103 xvdi_fatal_error(dip, err, "writing 'info'"); 1104 goto abort_trans; 1105 } 1106 1107 /* hard-coded 512-byte sector size */ 1108 if (err = xenbus_printf(xbt, xsnode, "sector-size", "%u", DEV_BSIZE)) { 1109 xvdi_fatal_error(dip, err, "writing 'sector-size'"); 1110 goto abort_trans; 1111 } 1112 1113 if (err = xenbus_printf(xbt, xsnode, "sectors", "%"PRIu64, 1114 vdp->xs_sectors)) { 1115 xvdi_fatal_error(dip, err, "writing 'sectors'"); 1116 goto abort_trans; 1117 } 1118 1119 if (err = xenbus_printf(xbt, xsnode, "instance", "%d", 1120 ddi_get_instance(dip))) { 1121 xvdi_fatal_error(dip, err, "writing 'instance'"); 1122 goto abort_trans; 1123 } 1124 1125 if ((err = xvdi_switch_state(dip, xbt, XenbusStateConnected)) > 0) { 1126 xvdi_fatal_error(dip, err, "writing 'state'"); 1127 goto abort_trans; 1128 } 1129 1130 if (err = xenbus_transaction_end(xbt, 0)) { 1131 if (err == EAGAIN) 1132 /* transaction is ended, don't need to abort it */ 1133 goto trans_retry; 1134 xvdi_fatal_error(dip, err, "completing transaction"); 1135 goto errout3; 1136 } 1137 1138 return (DDI_SUCCESS); 1139 1140 abort_trans: 1141 (void) xenbus_transaction_end(xbt, 1); 1142 errout3: 1143 mutex_enter(&vdp->xs_iomutex); 1144 vdp->xs_if_status = svdst; 1145 mutex_exit(&vdp->xs_iomutex); 1146 ddi_remove_intr(dip, 0, NULL); 1147 errout2: 1148 xdb_uninit_ioreqs(vdp); 1149 xdb_unbindfrom_frontend(vdp); 1150 errout1: 1151 xdb_close_device(vdp); 1152 return (DDI_FAILURE); 1153 } 1154 1155 /* 1156 * Kick-off disconnect process 1157 * xs_if_status will not be changed 1158 */ 1159 static int 1160 xdb_start_disconnect(xdb_t *vdp) 1161 { 1162 /* 1163 * Kick-off disconnect process 1164 */ 1165 if (xvdi_switch_state(vdp->xs_dip, XBT_NULL, XenbusStateClosing) > 0) 1166 return (DDI_FAILURE); 1167 1168 return (DDI_SUCCESS); 1169 } 1170 1171 /* 1172 * Disconnect from frontend and close backend device 1173 * ifstatus will be changed to XDB_DISCONNECTED 1174 * Xenbus state will be changed to XenbusStateClosed 1175 */ 1176 static void 1177 xdb_close(dev_info_t *dip) 1178 { 1179 xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); 1180 1181 ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); 1182 1183 mutex_enter(&vdp->xs_iomutex); 1184 1185 if (vdp->xs_if_status != XDB_CONNECTED) { 1186 vdp->xs_if_status = XDB_DISCONNECTED; 1187 cv_broadcast(&vdp->xs_iocv); 1188 mutex_exit(&vdp->xs_iomutex); 1189 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1190 return; 1191 } 1192 vdp->xs_if_status = XDB_DISCONNECTED; 1193 cv_broadcast(&vdp->xs_iocv); 1194 1195 mutex_exit(&vdp->xs_iomutex); 1196 1197 /* stop accepting I/O request from frontend */ 1198 ddi_remove_intr(dip, 0, NULL); 1199 /* clear all on-going I/Os, if any */ 1200 mutex_enter(&vdp->xs_iomutex); 1201 while (vdp->xs_ionum > 0) 1202 cv_wait(&vdp->xs_ionumcv, &vdp->xs_iomutex); 1203 mutex_exit(&vdp->xs_iomutex); 1204 1205 /* clean up resources and close this interface */ 1206 xdb_uninit_ioreqs(vdp); 1207 xdb_unbindfrom_frontend(vdp); 1208 xdb_close_device(vdp); 1209 vdp->xs_peer = (domid_t)-1; 1210 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1211 } 1212 1213 /* 1214 * Xdb_check_state_transition will check the XenbusState change to see 1215 * if the change is a valid transition or not. 1216 * The new state is written by frontend domain, or by running xenstore-write 1217 * to change it manually in dom0 1218 */ 1219 static int 1220 xdb_check_state_transition(xdb_t *vdp, XenbusState oestate) 1221 { 1222 enum xdb_state status; 1223 int stcheck; 1224 #define STOK 0 /* need further process */ 1225 #define STNOP 1 /* no action need taking */ 1226 #define STBUG 2 /* unexpected state change, could be a bug */ 1227 1228 status = vdp->xs_if_status; 1229 stcheck = STOK; 1230 1231 switch (status) { 1232 case XDB_UNKNOWN: 1233 if (vdp->xs_fe_status == XDB_FE_UNKNOWN) { 1234 if ((oestate == XenbusStateUnknown) || 1235 (oestate == XenbusStateConnected)) 1236 stcheck = STBUG; 1237 else if ((oestate == XenbusStateInitialising) || 1238 (oestate == XenbusStateInitWait)) 1239 stcheck = STNOP; 1240 } else { 1241 if ((oestate == XenbusStateUnknown) || 1242 (oestate == XenbusStateInitialising) || 1243 (oestate == XenbusStateInitWait) || 1244 (oestate == XenbusStateConnected)) 1245 stcheck = STBUG; 1246 else if (oestate == XenbusStateInitialised) 1247 stcheck = STNOP; 1248 } 1249 break; 1250 case XDB_CONNECTED: 1251 if ((oestate == XenbusStateUnknown) || 1252 (oestate == XenbusStateInitialising) || 1253 (oestate == XenbusStateInitWait) || 1254 (oestate == XenbusStateInitialised)) 1255 stcheck = STBUG; 1256 else if (oestate == XenbusStateConnected) 1257 stcheck = STNOP; 1258 break; 1259 case XDB_DISCONNECTED: 1260 default: 1261 stcheck = STBUG; 1262 } 1263 1264 if (stcheck == STOK) 1265 return (DDI_SUCCESS); 1266 1267 if (stcheck == STBUG) 1268 cmn_err(CE_NOTE, "xdb@%s: unexpected otherend " 1269 "state change to %d!, when status is %d", 1270 ddi_get_name_addr(vdp->xs_dip), oestate, status); 1271 1272 return (DDI_FAILURE); 1273 } 1274 1275 static void 1276 xdb_send_buf(void *arg) 1277 { 1278 buf_t *bp; 1279 xdb_t *vdp = (xdb_t *)arg; 1280 1281 mutex_enter(&vdp->xs_iomutex); 1282 1283 while (vdp->xs_if_status != XDB_DISCONNECTED) { 1284 while ((bp = vdp->xs_f_iobuf) != NULL) { 1285 vdp->xs_f_iobuf = bp->av_forw; 1286 bp->av_forw = NULL; 1287 vdp->xs_ionum++; 1288 mutex_exit(&vdp->xs_iomutex); 1289 if (bp->b_bcount != 0) { 1290 int err = ldi_strategy(vdp->xs_ldi_hdl, bp); 1291 if (err != 0) { 1292 bp->b_flags |= B_ERROR; 1293 (void) xdb_biodone(bp); 1294 XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, 1295 "xdb@%s: sent buf to backend dev" 1296 "failed, err=%d", 1297 ddi_get_name_addr(vdp->xs_dip), 1298 err)); 1299 } else { 1300 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, 1301 "sent buf to backend ok")); 1302 } 1303 } else /* no I/O need to be done */ 1304 (void) xdb_biodone(bp); 1305 1306 mutex_enter(&vdp->xs_iomutex); 1307 } 1308 1309 if (vdp->xs_if_status != XDB_DISCONNECTED) 1310 cv_wait(&vdp->xs_iocv, &vdp->xs_iomutex); 1311 } 1312 1313 mutex_exit(&vdp->xs_iomutex); 1314 } 1315 1316 /*ARGSUSED*/ 1317 static void 1318 xdb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, 1319 void *impl_data) 1320 { 1321 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 1322 xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); 1323 1324 XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: " 1325 "hotplug status change to %d!", ddi_get_name_addr(dip), state)); 1326 1327 mutex_enter(&vdp->xs_cbmutex); 1328 if (state == Connected) { 1329 /* Hotplug script has completed successfully */ 1330 if (vdp->xs_dev_status == XDB_DEV_UNKNOWN) { 1331 vdp->xs_dev_status = XDB_DEV_READY; 1332 if (vdp->xs_fe_status == XDB_FE_READY) 1333 /* try to connect to frontend */ 1334 if (xdb_start_connect(vdp) != DDI_SUCCESS) 1335 (void) xdb_start_disconnect(vdp); 1336 } 1337 } 1338 mutex_exit(&vdp->xs_cbmutex); 1339 } 1340 1341 /*ARGSUSED*/ 1342 static void 1343 xdb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, 1344 void *impl_data) 1345 { 1346 XenbusState new_state = *(XenbusState *)impl_data; 1347 xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); 1348 1349 XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: " 1350 "otherend state change to %d!", ddi_get_name_addr(dip), new_state)); 1351 1352 mutex_enter(&vdp->xs_cbmutex); 1353 1354 if (xdb_check_state_transition(vdp, new_state) == DDI_FAILURE) { 1355 mutex_exit(&vdp->xs_cbmutex); 1356 return; 1357 } 1358 1359 switch (new_state) { 1360 case XenbusStateInitialised: 1361 ASSERT(vdp->xs_if_status == XDB_UNKNOWN); 1362 1363 /* frontend is ready for connecting */ 1364 vdp->xs_fe_status = XDB_FE_READY; 1365 1366 if (vdp->xs_dev_status == XDB_DEV_READY) 1367 if (xdb_start_connect(vdp) != DDI_SUCCESS) 1368 (void) xdb_start_disconnect(vdp); 1369 break; 1370 case XenbusStateClosing: 1371 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1372 break; 1373 case XenbusStateClosed: 1374 /* clean up */ 1375 xdb_close(dip); 1376 1377 } 1378 1379 mutex_exit(&vdp->xs_cbmutex); 1380 } 1381 1382 static int 1383 xdb_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1384 { 1385 xdb_t *vdp; 1386 ddi_iblock_cookie_t ibc; 1387 int instance; 1388 1389 switch (cmd) { 1390 case DDI_RESUME: 1391 return (DDI_FAILURE); 1392 case DDI_ATTACH: 1393 break; 1394 default: 1395 return (DDI_FAILURE); 1396 } 1397 1398 /* DDI_ATTACH */ 1399 instance = ddi_get_instance(dip); 1400 if (ddi_soft_state_zalloc(xdb_statep, instance) != DDI_SUCCESS) 1401 return (DDI_FAILURE); 1402 1403 vdp = ddi_get_soft_state(xdb_statep, instance); 1404 vdp->xs_dip = dip; 1405 if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS) 1406 goto errout1; 1407 1408 if (!xdb_kstat_init(vdp)) 1409 goto errout1; 1410 1411 mutex_init(&vdp->xs_iomutex, NULL, MUTEX_DRIVER, (void *)ibc); 1412 mutex_init(&vdp->xs_cbmutex, NULL, MUTEX_DRIVER, (void *)ibc); 1413 cv_init(&vdp->xs_iocv, NULL, CV_DRIVER, NULL); 1414 cv_init(&vdp->xs_ionumcv, NULL, CV_DRIVER, NULL); 1415 1416 ddi_set_driver_private(dip, vdp); 1417 1418 vdp->xs_iotaskq = ddi_taskq_create(dip, "xdb_iotask", 1, 1419 TASKQ_DEFAULTPRI, 0); 1420 if (vdp->xs_iotaskq == NULL) 1421 goto errout2; 1422 (void) ddi_taskq_dispatch(vdp->xs_iotaskq, xdb_send_buf, vdp, 1423 DDI_SLEEP); 1424 1425 /* Watch frontend and hotplug state change */ 1426 if (xvdi_add_event_handler(dip, XS_OE_STATE, xdb_oe_state_change, 1427 NULL) != DDI_SUCCESS) 1428 goto errout3; 1429 if (xvdi_add_event_handler(dip, XS_HP_STATE, xdb_hp_state_change, 1430 NULL) != DDI_SUCCESS) { 1431 goto errout4; 1432 } 1433 1434 /* 1435 * Kick-off hotplug script 1436 */ 1437 if (xvdi_post_event(dip, XEN_HP_ADD) != DDI_SUCCESS) { 1438 cmn_err(CE_WARN, "xdb@%s: failed to start hotplug script", 1439 ddi_get_name_addr(dip)); 1440 goto errout4; 1441 } 1442 1443 /* 1444 * start waiting for hotplug event and otherend state event 1445 * mainly for debugging, frontend will not take any op seeing this 1446 */ 1447 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 1448 1449 XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: attached!", 1450 ddi_get_name_addr(dip))); 1451 return (DDI_SUCCESS); 1452 1453 errout4: 1454 xvdi_remove_event_handler(dip, NULL); 1455 errout3: 1456 mutex_enter(&vdp->xs_cbmutex); 1457 mutex_enter(&vdp->xs_iomutex); 1458 vdp->xs_if_status = XDB_DISCONNECTED; 1459 cv_broadcast(&vdp->xs_iocv); 1460 mutex_exit(&vdp->xs_iomutex); 1461 mutex_exit(&vdp->xs_cbmutex); 1462 ddi_taskq_destroy(vdp->xs_iotaskq); 1463 errout2: 1464 ddi_set_driver_private(dip, NULL); 1465 cv_destroy(&vdp->xs_iocv); 1466 cv_destroy(&vdp->xs_ionumcv); 1467 mutex_destroy(&vdp->xs_cbmutex); 1468 mutex_destroy(&vdp->xs_iomutex); 1469 kstat_delete(vdp->xs_kstats); 1470 errout1: 1471 ddi_soft_state_free(xdb_statep, instance); 1472 return (DDI_FAILURE); 1473 } 1474 1475 /*ARGSUSED*/ 1476 static int 1477 xdb_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1478 { 1479 int instance = ddi_get_instance(dip); 1480 xdb_t *vdp = XDB_INST2SOFTS(instance); 1481 1482 switch (cmd) { 1483 case DDI_SUSPEND: 1484 return (DDI_FAILURE); 1485 case DDI_DETACH: 1486 break; 1487 default: 1488 return (DDI_FAILURE); 1489 } 1490 1491 /* DDI_DETACH handling */ 1492 1493 /* shouldn't detach, if still used by frontend */ 1494 mutex_enter(&vdp->xs_iomutex); 1495 if (vdp->xs_if_status != XDB_DISCONNECTED) { 1496 mutex_exit(&vdp->xs_iomutex); 1497 return (DDI_FAILURE); 1498 } 1499 mutex_exit(&vdp->xs_iomutex); 1500 1501 xvdi_remove_event_handler(dip, NULL); 1502 /* can do nothing about it, if it fails */ 1503 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1504 1505 ddi_taskq_destroy(vdp->xs_iotaskq); 1506 cv_destroy(&vdp->xs_iocv); 1507 cv_destroy(&vdp->xs_ionumcv); 1508 mutex_destroy(&vdp->xs_cbmutex); 1509 mutex_destroy(&vdp->xs_iomutex); 1510 kstat_delete(vdp->xs_kstats); 1511 ddi_set_driver_private(dip, NULL); 1512 ddi_soft_state_free(xdb_statep, instance); 1513 1514 XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: detached!", 1515 ddi_get_name_addr(dip))); 1516 return (DDI_SUCCESS); 1517 } 1518 1519 static struct dev_ops xdb_dev_ops = { 1520 DEVO_REV, /* devo_rev */ 1521 0, /* devo_refcnt */ 1522 ddi_getinfo_1to1, /* devo_getinfo */ 1523 nulldev, /* devo_identify */ 1524 nulldev, /* devo_probe */ 1525 xdb_attach, /* devo_attach */ 1526 xdb_detach, /* devo_detach */ 1527 nodev, /* devo_reset */ 1528 NULL, /* devo_cb_ops */ 1529 NULL, /* devo_bus_ops */ 1530 NULL, /* power */ 1531 ddi_quiesce_not_needed, /* quiesce */ 1532 }; 1533 1534 /* 1535 * Module linkage information for the kernel. 1536 */ 1537 static struct modldrv modldrv = { 1538 &mod_driverops, /* Type of module. */ 1539 "vbd backend driver", /* Name of the module */ 1540 &xdb_dev_ops /* driver ops */ 1541 }; 1542 1543 static struct modlinkage xdb_modlinkage = { 1544 MODREV_1, 1545 &modldrv, 1546 NULL 1547 }; 1548 1549 int 1550 _init(void) 1551 { 1552 int rv; 1553 1554 if ((rv = ddi_soft_state_init((void **)&xdb_statep, 1555 sizeof (xdb_t), 0)) == 0) 1556 if ((rv = mod_install(&xdb_modlinkage)) != 0) 1557 ddi_soft_state_fini((void **)&xdb_statep); 1558 return (rv); 1559 } 1560 1561 int 1562 _fini(void) 1563 { 1564 int rv; 1565 1566 if ((rv = mod_remove(&xdb_modlinkage)) != 0) 1567 return (rv); 1568 ddi_soft_state_fini((void **)&xdb_statep); 1569 return (rv); 1570 } 1571 1572 int 1573 _info(struct modinfo *modinfop) 1574 { 1575 return (mod_info(&xdb_modlinkage, modinfop)); 1576 } 1577 1578 static int 1579 xdb_get_request(xdb_t *vdp, blkif_request_t *req) 1580 { 1581 void *src = xvdi_ring_get_request(vdp->xs_ring); 1582 1583 if (src == NULL) 1584 return (0); 1585 1586 switch (vdp->xs_blk_protocol) { 1587 case BLKIF_PROTOCOL_NATIVE: 1588 (void) memcpy(req, src, sizeof (*req)); 1589 break; 1590 case BLKIF_PROTOCOL_X86_32: 1591 blkif_get_x86_32_req(req, src); 1592 break; 1593 case BLKIF_PROTOCOL_X86_64: 1594 blkif_get_x86_64_req(req, src); 1595 break; 1596 default: 1597 cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d", 1598 ddi_get_name_addr(vdp->xs_dip), 1599 vdp->xs_blk_protocol); 1600 } 1601 return (1); 1602 } 1603 1604 static int 1605 xdb_push_response(xdb_t *vdp, uint64_t id, uint8_t op, uint16_t status) 1606 { 1607 ddi_acc_handle_t acchdl = vdp->xs_ring_hdl; 1608 blkif_response_t *rsp = xvdi_ring_get_response(vdp->xs_ring); 1609 blkif_x86_32_response_t *rsp_32 = (blkif_x86_32_response_t *)rsp; 1610 blkif_x86_64_response_t *rsp_64 = (blkif_x86_64_response_t *)rsp; 1611 1612 ASSERT(rsp); 1613 1614 switch (vdp->xs_blk_protocol) { 1615 case BLKIF_PROTOCOL_NATIVE: 1616 ddi_put64(acchdl, &rsp->id, id); 1617 ddi_put8(acchdl, &rsp->operation, op); 1618 ddi_put16(acchdl, (uint16_t *)&rsp->status, 1619 status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR); 1620 break; 1621 case BLKIF_PROTOCOL_X86_32: 1622 ddi_put64(acchdl, &rsp_32->id, id); 1623 ddi_put8(acchdl, &rsp_32->operation, op); 1624 ddi_put16(acchdl, (uint16_t *)&rsp_32->status, 1625 status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR); 1626 break; 1627 case BLKIF_PROTOCOL_X86_64: 1628 ddi_put64(acchdl, &rsp_64->id, id); 1629 ddi_put8(acchdl, &rsp_64->operation, op); 1630 ddi_put16(acchdl, (uint16_t *)&rsp_64->status, 1631 status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR); 1632 break; 1633 default: 1634 cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d", 1635 ddi_get_name_addr(vdp->xs_dip), 1636 vdp->xs_blk_protocol); 1637 } 1638 1639 return (xvdi_ring_push_response(vdp->xs_ring)); 1640 } 1641 1642 static void 1643 blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src) 1644 { 1645 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; 1646 dst->operation = src->operation; 1647 dst->nr_segments = src->nr_segments; 1648 dst->handle = src->handle; 1649 dst->id = src->id; 1650 dst->sector_number = src->sector_number; 1651 if (n > src->nr_segments) 1652 n = src->nr_segments; 1653 for (i = 0; i < n; i++) 1654 dst->seg[i] = src->seg[i]; 1655 } 1656 1657 static void 1658 blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src) 1659 { 1660 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; 1661 dst->operation = src->operation; 1662 dst->nr_segments = src->nr_segments; 1663 dst->handle = src->handle; 1664 dst->id = src->id; 1665 dst->sector_number = src->sector_number; 1666 if (n > src->nr_segments) 1667 n = src->nr_segments; 1668 for (i = 0; i < n; i++) 1669 dst->seg[i] = src->seg[i]; 1670 } 1671