1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Note: This is the backend part of the split PV disk driver. This driver 29 * is not a nexus driver, nor is it a leaf driver(block/char/stream driver). 30 * Currently, it does not create any minor node. So, although, it runs in 31 * backend domain, it will not be used directly from within dom0. 32 * It simply gets block I/O requests issued by frontend from a shared page 33 * (blkif ring buffer - defined by Xen) between backend and frontend domain, 34 * generates a buf, and push it down to underlying disk target driver via 35 * ldi interface. When buf is done, this driver will generate a response 36 * and put it into ring buffer to inform frontend of the status of the I/O 37 * request issued by it. When a new virtual device entry is added in xenstore, 38 * there will be an watch event sent from Xen to xvdi framework, who will, 39 * in turn, create the devinfo node and try to attach this driver 40 * (see xvdi_create_dev). When frontend peer changes its state to 41 * XenbusStateClose, an event will also be sent from Xen to xvdi framework, 42 * who will detach and remove this devinfo node (see i_xvdi_oestate_handler). 43 * I/O requests get from ring buffer and event coming from xenstore cannot be 44 * trusted. We verify them in xdb_get_buf() and xdb_check_state_transition(). 45 * 46 * Virtual device configuration is read/written from/to the database via 47 * xenbus_* interfaces. Driver also use xvdi_* to interact with hypervisor. 48 * There is an on-going effort to make xvdi_* cover all xenbus_*. 49 */ 50 51 #include <sys/types.h> 52 #include <sys/conf.h> 53 #include <sys/ddi.h> 54 #include <sys/dditypes.h> 55 #include <sys/sunddi.h> 56 #include <sys/list.h> 57 #include <sys/dkio.h> 58 #include <sys/cmlb.h> 59 #include <sys/vtoc.h> 60 #include <sys/modctl.h> 61 #include <sys/bootconf.h> 62 #include <sys/promif.h> 63 #include <sys/sysmacros.h> 64 #include <public/io/xenbus.h> 65 #include <xen/sys/xenbus_impl.h> 66 #include <xen/sys/xendev.h> 67 #include <sys/gnttab.h> 68 #include <sys/scsi/generic/inquiry.h> 69 #include <vm/seg_kmem.h> 70 #include <vm/hat_i86.h> 71 #include <sys/gnttab.h> 72 #include <sys/lofi.h> 73 #include <io/xdf.h> 74 #include <xen/io/blkif_impl.h> 75 #include <io/xdb.h> 76 77 static xdb_t *xdb_statep; 78 static int xdb_debug = 0; 79 80 static int xdb_push_response(xdb_t *, uint64_t, uint8_t, uint16_t); 81 static int xdb_get_request(xdb_t *, blkif_request_t *); 82 static void blkif_get_x86_32_req(blkif_request_t *, blkif_x86_32_request_t *); 83 static void blkif_get_x86_64_req(blkif_request_t *, blkif_x86_64_request_t *); 84 85 #ifdef DEBUG 86 /* 87 * debug aid functions 88 */ 89 90 static void 91 logva(xdb_t *vdp, uint64_t va) 92 { 93 uint64_t *page_addrs; 94 int i; 95 96 page_addrs = vdp->page_addrs; 97 for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) { 98 if (page_addrs[i] == va) 99 debug_enter("VA remapping found!"); 100 } 101 102 for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) { 103 if (page_addrs[i] == 0) { 104 page_addrs[i] = va; 105 break; 106 } 107 } 108 ASSERT(i < XDB_MAX_IO_PAGES(vdp)); 109 } 110 111 static void 112 unlogva(xdb_t *vdp, uint64_t va) 113 { 114 uint64_t *page_addrs; 115 int i; 116 117 page_addrs = vdp->page_addrs; 118 for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) { 119 if (page_addrs[i] == va) { 120 page_addrs[i] = 0; 121 break; 122 } 123 } 124 ASSERT(i < XDB_MAX_IO_PAGES(vdp)); 125 } 126 127 static void 128 xdb_dump_request_oe(blkif_request_t *req) 129 { 130 int i; 131 132 /* 133 * Exploit the public interface definitions for BLKIF_OP_READ 134 * etc.. 135 */ 136 char *op_name[] = { "read", "write", "barrier", "flush" }; 137 138 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "op=%s", op_name[req->operation])); 139 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "num of segments=%d", 140 req->nr_segments)); 141 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "handle=%d", req->handle)); 142 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "id=%llu", 143 (unsigned long long)req->id)); 144 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "start sector=%llu", 145 (unsigned long long)req->sector_number)); 146 for (i = 0; i < req->nr_segments; i++) { 147 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "gref=%d, first sec=%d," 148 "last sec=%d", req->seg[i].gref, req->seg[i].first_sect, 149 req->seg[i].last_sect)); 150 } 151 } 152 #endif /* DEBUG */ 153 154 /* 155 * Statistics. 156 */ 157 static char *xdb_stats[] = { 158 "rd_reqs", 159 "wr_reqs", 160 "br_reqs", 161 "fl_reqs", 162 "oo_reqs" 163 }; 164 165 static int 166 xdb_kstat_update(kstat_t *ksp, int flag) 167 { 168 xdb_t *vdp; 169 kstat_named_t *knp; 170 171 if (flag != KSTAT_READ) 172 return (EACCES); 173 174 vdp = ksp->ks_private; 175 knp = ksp->ks_data; 176 177 /* 178 * Assignment order should match that of the names in 179 * xdb_stats. 180 */ 181 (knp++)->value.ui64 = vdp->xs_stat_req_reads; 182 (knp++)->value.ui64 = vdp->xs_stat_req_writes; 183 (knp++)->value.ui64 = vdp->xs_stat_req_barriers; 184 (knp++)->value.ui64 = vdp->xs_stat_req_flushes; 185 (knp++)->value.ui64 = 0; /* oo_req */ 186 187 return (0); 188 } 189 190 static boolean_t 191 xdb_kstat_init(xdb_t *vdp) 192 { 193 int nstat = sizeof (xdb_stats) / sizeof (xdb_stats[0]); 194 char **cp = xdb_stats; 195 kstat_named_t *knp; 196 197 if ((vdp->xs_kstats = kstat_create("xdb", 198 ddi_get_instance(vdp->xs_dip), 199 "req_statistics", "block", KSTAT_TYPE_NAMED, 200 nstat, 0)) == NULL) 201 return (B_FALSE); 202 203 vdp->xs_kstats->ks_private = vdp; 204 vdp->xs_kstats->ks_update = xdb_kstat_update; 205 206 knp = vdp->xs_kstats->ks_data; 207 while (nstat > 0) { 208 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 209 knp++; 210 cp++; 211 nstat--; 212 } 213 214 kstat_install(vdp->xs_kstats); 215 216 return (B_TRUE); 217 } 218 219 static int xdb_biodone(buf_t *); 220 221 static buf_t * 222 xdb_get_buf(xdb_t *vdp, blkif_request_t *req, xdb_request_t *xreq) 223 { 224 buf_t *bp; 225 uint8_t segs, curseg; 226 int sectors; 227 int i, err; 228 gnttab_map_grant_ref_t mapops[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 229 ddi_acc_handle_t acchdl; 230 231 acchdl = vdp->xs_ring_hdl; 232 bp = XDB_XREQ2BP(xreq); 233 curseg = xreq->xr_curseg; 234 /* init a new xdb request */ 235 if (req != NULL) { 236 ASSERT(MUTEX_HELD(&vdp->xs_iomutex)); 237 boolean_t pagemapok = B_TRUE; 238 uint8_t op = ddi_get8(acchdl, &req->operation); 239 240 xreq->xr_vdp = vdp; 241 xreq->xr_op = op; 242 xreq->xr_id = ddi_get64(acchdl, &req->id); 243 segs = xreq->xr_buf_pages = ddi_get8(acchdl, &req->nr_segments); 244 if (segs == 0) { 245 if (op != BLKIF_OP_FLUSH_DISKCACHE) 246 cmn_err(CE_WARN, "!non-BLKIF_OP_FLUSH_DISKCACHE" 247 " is seen from domain %d with zero " 248 "length data buffer!", vdp->xs_peer); 249 bioinit(bp); 250 bp->b_bcount = 0; 251 bp->b_lblkno = 0; 252 bp->b_un.b_addr = NULL; 253 return (bp); 254 } else if (op == BLKIF_OP_FLUSH_DISKCACHE) { 255 cmn_err(CE_WARN, "!BLKIF_OP_FLUSH_DISKCACHE" 256 " is seen from domain %d with non-zero " 257 "length data buffer!", vdp->xs_peer); 258 } 259 260 /* 261 * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST 262 * according to the definition of blk interface by Xen 263 * we do sanity check here 264 */ 265 if (segs > BLKIF_MAX_SEGMENTS_PER_REQUEST) 266 segs = xreq->xr_buf_pages = 267 BLKIF_MAX_SEGMENTS_PER_REQUEST; 268 269 for (i = 0; i < segs; i++) { 270 uint8_t fs, ls; 271 272 mapops[i].host_addr = 273 (uint64_t)(uintptr_t)XDB_IOPAGE_VA( 274 vdp->xs_iopage_va, xreq->xr_idx, i); 275 mapops[i].dom = vdp->xs_peer; 276 mapops[i].ref = ddi_get32(acchdl, &req->seg[i].gref); 277 mapops[i].flags = GNTMAP_host_map; 278 if (op != BLKIF_OP_READ) 279 mapops[i].flags |= GNTMAP_readonly; 280 281 fs = ddi_get8(acchdl, &req->seg[i].first_sect); 282 ls = ddi_get8(acchdl, &req->seg[i].last_sect); 283 284 /* 285 * first_sect should be no bigger than last_sect and 286 * both of them should be no bigger than 287 * (PAGESIZE / XB_BSIZE - 1) according to definition 288 * of blk interface by Xen, so sanity check again 289 */ 290 if (fs > (PAGESIZE / XB_BSIZE - 1)) 291 fs = PAGESIZE / XB_BSIZE - 1; 292 if (ls > (PAGESIZE / XB_BSIZE - 1)) 293 ls = PAGESIZE / XB_BSIZE - 1; 294 if (fs > ls) 295 fs = ls; 296 297 xreq->xr_segs[i].fs = fs; 298 xreq->xr_segs[i].ls = ls; 299 } 300 301 /* map in io pages */ 302 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 303 mapops, i); 304 if (err != 0) 305 return (NULL); 306 for (i = 0; i < segs; i++) { 307 /* 308 * Although HYPERVISOR_grant_table_op() returned no 309 * error, mapping of each single page can fail. So, 310 * we have to do the check here and handle the error 311 * if needed 312 */ 313 if (mapops[i].status != GNTST_okay) { 314 int j; 315 for (j = 0; j < i; j++) { 316 #ifdef DEBUG 317 unlogva(vdp, mapops[j].host_addr); 318 #endif 319 xen_release_pfn( 320 xreq->xr_plist[j].p_pagenum); 321 } 322 pagemapok = B_FALSE; 323 break; 324 } 325 /* record page mapping handle for unmapping later */ 326 xreq->xr_page_hdls[i] = mapops[i].handle; 327 #ifdef DEBUG 328 logva(vdp, mapops[i].host_addr); 329 #endif 330 /* 331 * Pass the MFNs down using the shadow list (xr_pplist) 332 * 333 * This is pretty ugly since we have implict knowledge 334 * of how the rootnex binds buffers. 335 * The GNTTABOP_map_grant_ref op makes us do some ugly 336 * stuff since we're not allowed to touch these PTEs 337 * from the VM. 338 * 339 * Obviously, these aren't real page_t's. The rootnex 340 * only needs p_pagenum. 341 * Also, don't use btop() here or 32 bit PAE breaks. 342 */ 343 xreq->xr_pplist[i] = &xreq->xr_plist[i]; 344 xreq->xr_plist[i].p_pagenum = 345 xen_assign_pfn(mapops[i].dev_bus_addr >> PAGESHIFT); 346 } 347 348 /* 349 * not all pages mapped in successfully, unmap those mapped-in 350 * page and return failure 351 */ 352 if (!pagemapok) { 353 gnttab_unmap_grant_ref_t unmapop; 354 355 for (i = 0; i < segs; i++) { 356 if (mapops[i].status != GNTST_okay) 357 continue; 358 unmapop.host_addr = 359 (uint64_t)(uintptr_t)XDB_IOPAGE_VA( 360 vdp->xs_iopage_va, xreq->xr_idx, i); 361 unmapop.dev_bus_addr = NULL; 362 unmapop.handle = mapops[i].handle; 363 (void) HYPERVISOR_grant_table_op( 364 GNTTABOP_unmap_grant_ref, &unmapop, 1); 365 } 366 367 return (NULL); 368 } 369 bioinit(bp); 370 bp->b_lblkno = ddi_get64(acchdl, &req->sector_number); 371 bp->b_flags = B_BUSY | B_SHADOW | B_PHYS; 372 bp->b_flags |= (ddi_get8(acchdl, &req->operation) == 373 BLKIF_OP_READ) ? B_READ : (B_WRITE | B_ASYNC); 374 } else { 375 uint64_t blkst; 376 int isread; 377 378 /* reuse this buf */ 379 blkst = bp->b_lblkno + bp->b_bcount / DEV_BSIZE; 380 isread = bp->b_flags & B_READ; 381 bioreset(bp); 382 bp->b_lblkno = blkst; 383 bp->b_flags = B_BUSY | B_SHADOW | B_PHYS; 384 bp->b_flags |= isread ? B_READ : (B_WRITE | B_ASYNC); 385 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "reuse buf, xreq is %d!!", 386 xreq->xr_idx)); 387 } 388 389 /* form a buf */ 390 bp->b_un.b_addr = XDB_IOPAGE_VA(vdp->xs_iopage_va, xreq->xr_idx, 391 curseg) + xreq->xr_segs[curseg].fs * DEV_BSIZE; 392 bp->b_shadow = &xreq->xr_pplist[curseg]; 393 bp->b_iodone = xdb_biodone; 394 sectors = 0; 395 for (i = curseg; i < xreq->xr_buf_pages; i++) { 396 /* 397 * The xreq->xr_segs[i].fs of the first seg can be non-zero 398 * otherwise, we'll break it into multiple bufs 399 */ 400 if ((i != curseg) && (xreq->xr_segs[i].fs != 0)) { 401 break; 402 } 403 sectors += (xreq->xr_segs[i].ls - xreq->xr_segs[i].fs + 1); 404 } 405 xreq->xr_curseg = i; 406 bp->b_bcount = sectors * DEV_BSIZE; 407 bp->b_bufsize = bp->b_bcount; 408 409 return (bp); 410 } 411 412 static xdb_request_t * 413 xdb_get_req(xdb_t *vdp) 414 { 415 xdb_request_t *req; 416 int idx; 417 418 ASSERT(MUTEX_HELD(&vdp->xs_iomutex)); 419 ASSERT(vdp->xs_free_req != -1); 420 req = &vdp->xs_req[vdp->xs_free_req]; 421 vdp->xs_free_req = req->xr_next; 422 idx = req->xr_idx; 423 bzero(req, sizeof (xdb_request_t)); 424 req->xr_idx = idx; 425 return (req); 426 } 427 428 static void 429 xdb_free_req(xdb_request_t *req) 430 { 431 xdb_t *vdp = req->xr_vdp; 432 433 ASSERT(MUTEX_HELD(&vdp->xs_iomutex)); 434 req->xr_next = vdp->xs_free_req; 435 vdp->xs_free_req = req->xr_idx; 436 } 437 438 static void 439 xdb_response(xdb_t *vdp, blkif_request_t *req, boolean_t ok) 440 { 441 ddi_acc_handle_t acchdl = vdp->xs_ring_hdl; 442 443 if (xdb_push_response(vdp, ddi_get64(acchdl, &req->id), 444 ddi_get8(acchdl, &req->operation), ok)) 445 xvdi_notify_oe(vdp->xs_dip); 446 } 447 448 static void 449 xdb_init_ioreqs(xdb_t *vdp) 450 { 451 int i; 452 453 ASSERT(vdp->xs_nentry); 454 455 if (vdp->xs_req == NULL) 456 vdp->xs_req = kmem_alloc(vdp->xs_nentry * 457 sizeof (xdb_request_t), KM_SLEEP); 458 #ifdef DEBUG 459 if (vdp->page_addrs == NULL) 460 vdp->page_addrs = kmem_zalloc(XDB_MAX_IO_PAGES(vdp) * 461 sizeof (uint64_t), KM_SLEEP); 462 #endif 463 for (i = 0; i < vdp->xs_nentry; i++) { 464 vdp->xs_req[i].xr_idx = i; 465 vdp->xs_req[i].xr_next = i + 1; 466 } 467 vdp->xs_req[vdp->xs_nentry - 1].xr_next = -1; 468 vdp->xs_free_req = 0; 469 470 /* alloc va in host dom for io page mapping */ 471 vdp->xs_iopage_va = vmem_xalloc(heap_arena, 472 XDB_MAX_IO_PAGES(vdp) * PAGESIZE, PAGESIZE, 0, 0, 0, 0, 473 VM_SLEEP); 474 for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) 475 hat_prepare_mapping(kas.a_hat, 476 vdp->xs_iopage_va + i * PAGESIZE); 477 } 478 479 static void 480 xdb_uninit_ioreqs(xdb_t *vdp) 481 { 482 int i; 483 484 for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) 485 hat_release_mapping(kas.a_hat, 486 vdp->xs_iopage_va + i * PAGESIZE); 487 vmem_xfree(heap_arena, vdp->xs_iopage_va, 488 XDB_MAX_IO_PAGES(vdp) * PAGESIZE); 489 if (vdp->xs_req != NULL) { 490 kmem_free(vdp->xs_req, vdp->xs_nentry * sizeof (xdb_request_t)); 491 vdp->xs_req = NULL; 492 } 493 #ifdef DEBUG 494 if (vdp->page_addrs != NULL) { 495 kmem_free(vdp->page_addrs, XDB_MAX_IO_PAGES(vdp) * 496 sizeof (uint64_t)); 497 vdp->page_addrs = NULL; 498 } 499 #endif 500 } 501 502 static uint_t 503 xdb_intr(caddr_t arg) 504 { 505 blkif_request_t req; 506 blkif_request_t *reqp = &req; 507 xdb_request_t *xreq; 508 buf_t *bp; 509 uint8_t op; 510 xdb_t *vdp = (xdb_t *)arg; 511 int ret = DDI_INTR_UNCLAIMED; 512 dev_info_t *dip = vdp->xs_dip; 513 514 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, 515 "xdb@%s: I/O request received from dom %d", 516 ddi_get_name_addr(dip), vdp->xs_peer)); 517 518 mutex_enter(&vdp->xs_iomutex); 519 520 /* shouldn't touch ring buffer if not in connected state */ 521 if (vdp->xs_if_status != XDB_CONNECTED) { 522 mutex_exit(&vdp->xs_iomutex); 523 return (DDI_INTR_UNCLAIMED); 524 } 525 526 /* 527 * We'll loop till there is no more request in the ring 528 * We won't stuck in this loop for ever since the size of ring buffer 529 * is limited, and frontend will stop pushing requests into it when 530 * the ring buffer is full 531 */ 532 533 /* req_event will be increased in xvdi_ring_get_request() */ 534 while (xdb_get_request(vdp, reqp)) { 535 ret = DDI_INTR_CLAIMED; 536 537 op = ddi_get8(vdp->xs_ring_hdl, &reqp->operation); 538 if (op == BLKIF_OP_READ || 539 op == BLKIF_OP_WRITE || 540 op == BLKIF_OP_WRITE_BARRIER || 541 op == BLKIF_OP_FLUSH_DISKCACHE) { 542 #ifdef DEBUG 543 xdb_dump_request_oe(reqp); 544 #endif 545 xreq = xdb_get_req(vdp); 546 ASSERT(xreq); 547 switch (op) { 548 case BLKIF_OP_READ: 549 vdp->xs_stat_req_reads++; 550 break; 551 case BLKIF_OP_WRITE_BARRIER: 552 vdp->xs_stat_req_barriers++; 553 /* FALLTHRU */ 554 case BLKIF_OP_WRITE: 555 vdp->xs_stat_req_writes++; 556 break; 557 case BLKIF_OP_FLUSH_DISKCACHE: 558 vdp->xs_stat_req_flushes++; 559 break; 560 } 561 562 xreq->xr_curseg = 0; /* start from first segment */ 563 bp = xdb_get_buf(vdp, reqp, xreq); 564 if (bp == NULL) { 565 /* failed to form a buf */ 566 xdb_free_req(xreq); 567 xdb_response(vdp, reqp, B_FALSE); 568 continue; 569 } 570 bp->av_forw = NULL; 571 572 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, 573 " buf %p, blkno %lld, size %lu, addr %p", 574 (void *)bp, (longlong_t)bp->b_blkno, 575 (ulong_t)bp->b_bcount, (void *)bp->b_un.b_addr)); 576 577 /* send bp to underlying blk driver */ 578 if (vdp->xs_f_iobuf == NULL) { 579 vdp->xs_f_iobuf = vdp->xs_l_iobuf = bp; 580 } else { 581 vdp->xs_l_iobuf->av_forw = bp; 582 vdp->xs_l_iobuf = bp; 583 } 584 } else { 585 xdb_response(vdp, reqp, B_FALSE); 586 XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: " 587 "Unsupported cmd received from dom %d", 588 ddi_get_name_addr(dip), vdp->xs_peer)); 589 } 590 } 591 /* notify our taskq to push buf to underlying blk driver */ 592 if (ret == DDI_INTR_CLAIMED) 593 cv_broadcast(&vdp->xs_iocv); 594 595 mutex_exit(&vdp->xs_iomutex); 596 597 return (ret); 598 } 599 600 static int 601 xdb_biodone(buf_t *bp) 602 { 603 int i, err, bioerr; 604 uint8_t segs; 605 gnttab_unmap_grant_ref_t unmapops[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 606 xdb_request_t *xreq = XDB_BP2XREQ(bp); 607 xdb_t *vdp = xreq->xr_vdp; 608 buf_t *nbp; 609 610 bioerr = geterror(bp); 611 if (bioerr) 612 XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: I/O error %d", 613 ddi_get_name_addr(vdp->xs_dip), bioerr)); 614 615 /* check if we are done w/ this I/O request */ 616 if ((bioerr == 0) && (xreq->xr_curseg < xreq->xr_buf_pages)) { 617 nbp = xdb_get_buf(vdp, NULL, xreq); 618 if (nbp) { 619 err = ldi_strategy(vdp->xs_ldi_hdl, nbp); 620 if (err == 0) { 621 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, 622 "sent buf to backend ok")); 623 return (DDI_SUCCESS); 624 } 625 bioerr = EIO; 626 XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: " 627 "sent buf to backend dev failed, err=%d", 628 ddi_get_name_addr(vdp->xs_dip), err)); 629 } else { 630 bioerr = EIO; 631 } 632 } 633 634 /* unmap io pages */ 635 segs = xreq->xr_buf_pages; 636 /* 637 * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST 638 * according to the definition of blk interface by Xen 639 */ 640 ASSERT(segs <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 641 for (i = 0; i < segs; i++) { 642 unmapops[i].host_addr = (uint64_t)(uintptr_t)XDB_IOPAGE_VA( 643 vdp->xs_iopage_va, xreq->xr_idx, i); 644 #ifdef DEBUG 645 mutex_enter(&vdp->xs_iomutex); 646 unlogva(vdp, unmapops[i].host_addr); 647 mutex_exit(&vdp->xs_iomutex); 648 #endif 649 unmapops[i].dev_bus_addr = NULL; 650 unmapops[i].handle = xreq->xr_page_hdls[i]; 651 } 652 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 653 unmapops, segs); 654 ASSERT(!err); 655 656 /* 657 * If we have reached a barrier write or a cache flush , then we must 658 * flush all our I/Os. 659 */ 660 if (xreq->xr_op == BLKIF_OP_WRITE_BARRIER || 661 xreq->xr_op == BLKIF_OP_FLUSH_DISKCACHE) { 662 /* 663 * XXX At this point the write did succeed, so I don't 664 * believe we should report an error because the flush 665 * failed. However, this is a debatable point, so 666 * maybe we need to think more carefully about this. 667 * For now, just cast to void. 668 */ 669 (void) ldi_ioctl(vdp->xs_ldi_hdl, 670 DKIOCFLUSHWRITECACHE, NULL, FKIOCTL, kcred, NULL); 671 } 672 673 mutex_enter(&vdp->xs_iomutex); 674 675 /* send response back to frontend */ 676 if (vdp->xs_if_status == XDB_CONNECTED) { 677 if (xdb_push_response(vdp, xreq->xr_id, xreq->xr_op, bioerr)) 678 xvdi_notify_oe(vdp->xs_dip); 679 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, 680 "sent resp back to frontend, id=%llu", 681 (unsigned long long)xreq->xr_id)); 682 } 683 /* free io resources */ 684 biofini(bp); 685 xdb_free_req(xreq); 686 687 vdp->xs_ionum--; 688 if ((vdp->xs_if_status != XDB_CONNECTED) && (vdp->xs_ionum == 0)) { 689 /* we're closing, someone is waiting for I/O clean-up */ 690 cv_signal(&vdp->xs_ionumcv); 691 } 692 693 mutex_exit(&vdp->xs_iomutex); 694 695 return (DDI_SUCCESS); 696 } 697 698 static int 699 xdb_bindto_frontend(xdb_t *vdp) 700 { 701 int err; 702 char *oename; 703 grant_ref_t gref; 704 evtchn_port_t evtchn; 705 dev_info_t *dip = vdp->xs_dip; 706 char protocol[64] = ""; 707 708 /* 709 * Gather info from frontend 710 */ 711 oename = xvdi_get_oename(dip); 712 if (oename == NULL) 713 return (DDI_FAILURE); 714 715 err = xenbus_gather(XBT_NULL, oename, 716 "ring-ref", "%lu", &gref, "event-channel", "%u", &evtchn, NULL); 717 if (err != 0) { 718 xvdi_fatal_error(dip, err, 719 "Getting ring-ref and evtchn from frontend"); 720 return (DDI_FAILURE); 721 } 722 723 vdp->xs_blk_protocol = BLKIF_PROTOCOL_NATIVE; 724 vdp->xs_nentry = BLKIF_RING_SIZE; 725 vdp->xs_entrysize = sizeof (union blkif_sring_entry); 726 727 err = xenbus_gather(XBT_NULL, oename, 728 "protocol", "%63s", protocol, NULL); 729 if (err) 730 (void) strcpy(protocol, "unspecified, assuming native"); 731 else { 732 /* 733 * We must check for NATIVE first, so that the fast path 734 * is taken for copying data from the guest to the host. 735 */ 736 if (strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE) != 0) { 737 if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_32) == 0) { 738 vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_32; 739 vdp->xs_nentry = BLKIF_X86_32_RING_SIZE; 740 vdp->xs_entrysize = 741 sizeof (union blkif_x86_32_sring_entry); 742 } else if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_64) == 743 0) { 744 vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_64; 745 vdp->xs_nentry = BLKIF_X86_64_RING_SIZE; 746 vdp->xs_entrysize = 747 sizeof (union blkif_x86_64_sring_entry); 748 } else { 749 xvdi_fatal_error(dip, err, "unknown protocol"); 750 return (DDI_FAILURE); 751 } 752 } 753 } 754 #ifdef DEBUG 755 cmn_err(CE_NOTE, "!xdb@%s: blkif protocol '%s' ", 756 ddi_get_name_addr(dip), protocol); 757 #endif 758 759 /* 760 * map and init ring 761 * 762 * The ring parameters must match those which have been allocated 763 * in the front end. 764 */ 765 err = xvdi_map_ring(dip, vdp->xs_nentry, vdp->xs_entrysize, 766 gref, &vdp->xs_ring); 767 if (err != DDI_SUCCESS) 768 return (DDI_FAILURE); 769 /* 770 * This will be removed after we use shadow I/O ring request since 771 * we don't need to access the ring itself directly, thus the access 772 * handle is not needed 773 */ 774 vdp->xs_ring_hdl = vdp->xs_ring->xr_acc_hdl; 775 776 /* 777 * bind event channel 778 */ 779 err = xvdi_bind_evtchn(dip, evtchn); 780 if (err != DDI_SUCCESS) { 781 xvdi_unmap_ring(vdp->xs_ring); 782 return (DDI_FAILURE); 783 } 784 785 return (DDI_SUCCESS); 786 } 787 788 static void 789 xdb_unbindfrom_frontend(xdb_t *vdp) 790 { 791 xvdi_free_evtchn(vdp->xs_dip); 792 xvdi_unmap_ring(vdp->xs_ring); 793 } 794 795 #define LOFI_CTRL_NODE "/dev/lofictl" 796 #define LOFI_DEV_NODE "/devices/pseudo/lofi@0:" 797 #define LOFI_MODE FREAD | FWRITE | FEXCL 798 799 static int 800 xdb_setup_node(xdb_t *vdp, char *path) 801 { 802 dev_info_t *dip; 803 char *xsnode, *node; 804 ldi_handle_t ldi_hdl; 805 struct lofi_ioctl *li; 806 int minor; 807 int err; 808 unsigned int len; 809 810 dip = vdp->xs_dip; 811 xsnode = xvdi_get_xsname(dip); 812 if (xsnode == NULL) 813 return (DDI_FAILURE); 814 815 err = xenbus_read(XBT_NULL, xsnode, "dynamic-device-path", 816 (void **)&node, &len); 817 if (err == ENOENT) 818 err = xenbus_read(XBT_NULL, xsnode, "params", (void **)&node, 819 &len); 820 if (err != 0) { 821 xvdi_fatal_error(vdp->xs_dip, err, "reading 'params'"); 822 return (DDI_FAILURE); 823 } 824 825 if (!XDB_IS_LOFI(vdp)) { 826 (void) strlcpy(path, node, MAXPATHLEN + 1); 827 kmem_free(node, len); 828 return (DDI_SUCCESS); 829 } 830 831 do { 832 err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred, 833 &ldi_hdl, vdp->xs_ldi_li); 834 } while (err == EBUSY); 835 if (err != 0) { 836 kmem_free(node, len); 837 return (DDI_FAILURE); 838 } 839 840 li = kmem_zalloc(sizeof (*li), KM_SLEEP); 841 (void) strlcpy(li->li_filename, node, MAXPATHLEN + 1); 842 kmem_free(node, len); 843 if (ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li, 844 LOFI_MODE | FKIOCTL, kcred, &minor) != 0) { 845 cmn_err(CE_WARN, "xdb@%s: Failed to create lofi dev for %s", 846 ddi_get_name_addr(dip), li->li_filename); 847 (void) ldi_close(ldi_hdl, LOFI_MODE, kcred); 848 kmem_free(li, sizeof (*li)); 849 return (DDI_FAILURE); 850 } 851 /* 852 * return '/devices/...' instead of '/dev/lofi/...' since the 853 * former is available immediately after calling ldi_ioctl 854 */ 855 (void) snprintf(path, MAXPATHLEN + 1, LOFI_DEV_NODE "%d", minor); 856 (void) xenbus_printf(XBT_NULL, xsnode, "node", "%s", path); 857 (void) ldi_close(ldi_hdl, LOFI_MODE, kcred); 858 kmem_free(li, sizeof (*li)); 859 return (DDI_SUCCESS); 860 } 861 862 static void 863 xdb_teardown_node(xdb_t *vdp) 864 { 865 dev_info_t *dip; 866 char *xsnode, *node; 867 ldi_handle_t ldi_hdl; 868 struct lofi_ioctl *li; 869 int err; 870 unsigned int len; 871 872 if (!XDB_IS_LOFI(vdp)) 873 return; 874 875 dip = vdp->xs_dip; 876 xsnode = xvdi_get_xsname(dip); 877 if (xsnode == NULL) 878 return; 879 880 err = xenbus_read(XBT_NULL, xsnode, "dynamic-device-path", 881 (void **)&node, &len); 882 if (err == ENOENT) 883 err = xenbus_read(XBT_NULL, xsnode, "params", (void **)&node, 884 &len); 885 if (err != 0) { 886 xvdi_fatal_error(vdp->xs_dip, err, "reading 'params'"); 887 return; 888 } 889 890 li = kmem_zalloc(sizeof (*li), KM_SLEEP); 891 (void) strlcpy(li->li_filename, node, MAXPATHLEN + 1); 892 kmem_free(node, len); 893 894 do { 895 err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred, 896 &ldi_hdl, vdp->xs_ldi_li); 897 } while (err == EBUSY); 898 899 if (err != 0) { 900 kmem_free(li, sizeof (*li)); 901 return; 902 } 903 904 if (ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE, (intptr_t)li, 905 LOFI_MODE | FKIOCTL, kcred, NULL) != 0) { 906 cmn_err(CE_WARN, "xdb@%s: Failed to delete lofi dev for %s", 907 ddi_get_name_addr(dip), li->li_filename); 908 } 909 910 (void) ldi_close(ldi_hdl, LOFI_MODE, kcred); 911 kmem_free(li, sizeof (*li)); 912 } 913 914 static int 915 xdb_open_device(xdb_t *vdp) 916 { 917 uint64_t devsize; 918 dev_info_t *dip; 919 char *xsnode; 920 char *nodepath; 921 char *mode = NULL; 922 char *type = NULL; 923 int err; 924 925 dip = vdp->xs_dip; 926 xsnode = xvdi_get_xsname(dip); 927 if (xsnode == NULL) 928 return (DDI_FAILURE); 929 930 err = xenbus_gather(XBT_NULL, xsnode, 931 "mode", NULL, &mode, "type", NULL, &type, NULL); 932 if (err != 0) { 933 if (mode) 934 kmem_free(mode, strlen(mode) + 1); 935 if (type) 936 kmem_free(type, strlen(type) + 1); 937 xvdi_fatal_error(dip, err, 938 "Getting mode and type from backend device"); 939 return (DDI_FAILURE); 940 } 941 if (strcmp(type, "file") == 0) { 942 vdp->xs_type |= XDB_DEV_LOFI; 943 } 944 kmem_free(type, strlen(type) + 1); 945 if ((strcmp(mode, "r") == NULL) || (strcmp(mode, "ro") == NULL)) { 946 vdp->xs_type |= XDB_DEV_RO; 947 } 948 kmem_free(mode, strlen(mode) + 1); 949 950 /* 951 * try to open backend device 952 */ 953 if (ldi_ident_from_dip(dip, &vdp->xs_ldi_li) != 0) 954 return (DDI_FAILURE); 955 956 nodepath = kmem_zalloc(MAXPATHLEN + 1, KM_SLEEP); 957 err = xdb_setup_node(vdp, nodepath); 958 if (err != DDI_SUCCESS) { 959 xvdi_fatal_error(dip, err, 960 "Getting device path of backend device"); 961 ldi_ident_release(vdp->xs_ldi_li); 962 kmem_free(nodepath, MAXPATHLEN + 1); 963 return (DDI_FAILURE); 964 } 965 966 if (ldi_open_by_name(nodepath, 967 FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), 968 kcred, &vdp->xs_ldi_hdl, vdp->xs_ldi_li) != 0) { 969 xdb_teardown_node(vdp); 970 ldi_ident_release(vdp->xs_ldi_li); 971 cmn_err(CE_WARN, "xdb@%s: Failed to open: %s", 972 ddi_get_name_addr(dip), nodepath); 973 kmem_free(nodepath, MAXPATHLEN + 1); 974 return (DDI_FAILURE); 975 } 976 977 /* check if it's a CD/DVD disc */ 978 if (ldi_prop_get_int(vdp->xs_ldi_hdl, LDI_DEV_T_ANY | DDI_PROP_DONTPASS, 979 "inquiry-device-type", DTYPE_DIRECT) == DTYPE_RODIRECT) 980 vdp->xs_type |= XDB_DEV_CD; 981 /* check if it's a removable disk */ 982 if (ldi_prop_exists(vdp->xs_ldi_hdl, 983 LDI_DEV_T_ANY | DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 984 "removable-media")) 985 vdp->xs_type |= XDB_DEV_RMB; 986 987 if (ldi_get_size(vdp->xs_ldi_hdl, &devsize) != DDI_SUCCESS) { 988 (void) ldi_close(vdp->xs_ldi_hdl, 989 FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred); 990 xdb_teardown_node(vdp); 991 ldi_ident_release(vdp->xs_ldi_li); 992 kmem_free(nodepath, MAXPATHLEN + 1); 993 return (DDI_FAILURE); 994 } 995 vdp->xs_sectors = devsize / XB_BSIZE; 996 997 kmem_free(nodepath, MAXPATHLEN + 1); 998 return (DDI_SUCCESS); 999 } 1000 1001 static void 1002 xdb_close_device(xdb_t *vdp) 1003 { 1004 (void) ldi_close(vdp->xs_ldi_hdl, 1005 FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred); 1006 xdb_teardown_node(vdp); 1007 ldi_ident_release(vdp->xs_ldi_li); 1008 vdp->xs_ldi_li = NULL; 1009 vdp->xs_ldi_hdl = NULL; 1010 } 1011 1012 /* 1013 * Kick-off connect process 1014 * If xs_fe_status == XDB_FE_READY and xs_dev_status == XDB_DEV_READY 1015 * the xs_if_status will be changed to XDB_CONNECTED on success, 1016 * otherwise, xs_if_status will not be changed 1017 */ 1018 static int 1019 xdb_start_connect(xdb_t *vdp) 1020 { 1021 uint32_t dinfo; 1022 xenbus_transaction_t xbt; 1023 int err, svdst; 1024 char *xsnode; 1025 dev_info_t *dip = vdp->xs_dip; 1026 char *barrier; 1027 uint_t len; 1028 1029 /* 1030 * Start connect to frontend only when backend device are ready 1031 * and frontend has moved to XenbusStateInitialised, which means 1032 * ready to connect 1033 */ 1034 ASSERT((vdp->xs_fe_status == XDB_FE_READY) && 1035 (vdp->xs_dev_status == XDB_DEV_READY)); 1036 1037 if (((xsnode = xvdi_get_xsname(dip)) == NULL) || 1038 ((vdp->xs_peer = xvdi_get_oeid(dip)) == (domid_t)-1) || 1039 (xdb_open_device(vdp) != DDI_SUCCESS)) 1040 return (DDI_FAILURE); 1041 1042 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialised); 1043 1044 if (xdb_bindto_frontend(vdp) != DDI_SUCCESS) 1045 goto errout1; 1046 1047 /* init i/o requests */ 1048 xdb_init_ioreqs(vdp); 1049 1050 if (ddi_add_intr(dip, 0, NULL, NULL, xdb_intr, (caddr_t)vdp) 1051 != DDI_SUCCESS) 1052 goto errout2; 1053 1054 /* 1055 * we can recieve intr any time from now on 1056 * mark that we're ready to take intr 1057 */ 1058 mutex_enter(&vdp->xs_iomutex); 1059 /* 1060 * save it in case we need to restore when we 1061 * fail to write xenstore later 1062 */ 1063 svdst = vdp->xs_if_status; 1064 vdp->xs_if_status = XDB_CONNECTED; 1065 mutex_exit(&vdp->xs_iomutex); 1066 1067 /* write into xenstore the info needed by frontend */ 1068 trans_retry: 1069 if (xenbus_transaction_start(&xbt)) { 1070 xvdi_fatal_error(dip, EIO, "transaction start"); 1071 goto errout3; 1072 } 1073 1074 /* 1075 * If feature-barrier isn't present in xenstore, add it. 1076 */ 1077 if (xenbus_read(xbt, xsnode, "feature-barrier", 1078 (void **)&barrier, &len) != 0) { 1079 if ((err = xenbus_printf(xbt, xsnode, "feature-barrier", 1080 "%d", 1)) != 0) { 1081 cmn_err(CE_WARN, "xdb@%s: failed to write " 1082 "'feature-barrier'", ddi_get_name_addr(dip)); 1083 xvdi_fatal_error(dip, err, "writing 'feature-barrier'"); 1084 goto abort_trans; 1085 } 1086 } else 1087 kmem_free(barrier, len); 1088 1089 dinfo = 0; 1090 if (XDB_IS_RO(vdp)) 1091 dinfo |= VDISK_READONLY; 1092 if (XDB_IS_CD(vdp)) 1093 dinfo |= VDISK_CDROM; 1094 if (XDB_IS_RMB(vdp)) 1095 dinfo |= VDISK_REMOVABLE; 1096 if (err = xenbus_printf(xbt, xsnode, "info", "%u", dinfo)) { 1097 xvdi_fatal_error(dip, err, "writing 'info'"); 1098 goto abort_trans; 1099 } 1100 1101 /* hard-coded 512-byte sector size */ 1102 if (err = xenbus_printf(xbt, xsnode, "sector-size", "%u", DEV_BSIZE)) { 1103 xvdi_fatal_error(dip, err, "writing 'sector-size'"); 1104 goto abort_trans; 1105 } 1106 1107 if (err = xenbus_printf(xbt, xsnode, "sectors", "%"PRIu64, 1108 vdp->xs_sectors)) { 1109 xvdi_fatal_error(dip, err, "writing 'sectors'"); 1110 goto abort_trans; 1111 } 1112 1113 if (err = xenbus_printf(xbt, xsnode, "instance", "%d", 1114 ddi_get_instance(dip))) { 1115 xvdi_fatal_error(dip, err, "writing 'instance'"); 1116 goto abort_trans; 1117 } 1118 1119 if ((err = xvdi_switch_state(dip, xbt, XenbusStateConnected)) > 0) { 1120 xvdi_fatal_error(dip, err, "writing 'state'"); 1121 goto abort_trans; 1122 } 1123 1124 if (err = xenbus_transaction_end(xbt, 0)) { 1125 if (err == EAGAIN) 1126 /* transaction is ended, don't need to abort it */ 1127 goto trans_retry; 1128 xvdi_fatal_error(dip, err, "completing transaction"); 1129 goto errout3; 1130 } 1131 1132 return (DDI_SUCCESS); 1133 1134 abort_trans: 1135 (void) xenbus_transaction_end(xbt, 1); 1136 errout3: 1137 mutex_enter(&vdp->xs_iomutex); 1138 vdp->xs_if_status = svdst; 1139 mutex_exit(&vdp->xs_iomutex); 1140 ddi_remove_intr(dip, 0, NULL); 1141 errout2: 1142 xdb_uninit_ioreqs(vdp); 1143 xdb_unbindfrom_frontend(vdp); 1144 errout1: 1145 xdb_close_device(vdp); 1146 return (DDI_FAILURE); 1147 } 1148 1149 /* 1150 * Kick-off disconnect process 1151 * xs_if_status will not be changed 1152 */ 1153 static int 1154 xdb_start_disconnect(xdb_t *vdp) 1155 { 1156 /* 1157 * Kick-off disconnect process 1158 */ 1159 if (xvdi_switch_state(vdp->xs_dip, XBT_NULL, XenbusStateClosing) > 0) 1160 return (DDI_FAILURE); 1161 1162 return (DDI_SUCCESS); 1163 } 1164 1165 /* 1166 * Disconnect from frontend and close backend device 1167 * ifstatus will be changed to XDB_DISCONNECTED 1168 * Xenbus state will be changed to XenbusStateClosed 1169 */ 1170 static void 1171 xdb_close(dev_info_t *dip) 1172 { 1173 xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); 1174 1175 ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); 1176 1177 mutex_enter(&vdp->xs_iomutex); 1178 1179 if (vdp->xs_if_status != XDB_CONNECTED) { 1180 vdp->xs_if_status = XDB_DISCONNECTED; 1181 cv_broadcast(&vdp->xs_iocv); 1182 mutex_exit(&vdp->xs_iomutex); 1183 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1184 return; 1185 } 1186 vdp->xs_if_status = XDB_DISCONNECTED; 1187 cv_broadcast(&vdp->xs_iocv); 1188 1189 mutex_exit(&vdp->xs_iomutex); 1190 1191 /* stop accepting I/O request from frontend */ 1192 ddi_remove_intr(dip, 0, NULL); 1193 /* clear all on-going I/Os, if any */ 1194 mutex_enter(&vdp->xs_iomutex); 1195 while (vdp->xs_ionum > 0) 1196 cv_wait(&vdp->xs_ionumcv, &vdp->xs_iomutex); 1197 mutex_exit(&vdp->xs_iomutex); 1198 1199 /* clean up resources and close this interface */ 1200 xdb_uninit_ioreqs(vdp); 1201 xdb_unbindfrom_frontend(vdp); 1202 xdb_close_device(vdp); 1203 vdp->xs_peer = (domid_t)-1; 1204 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1205 } 1206 1207 /* 1208 * Xdb_check_state_transition will check the XenbusState change to see 1209 * if the change is a valid transition or not. 1210 * The new state is written by frontend domain, or by running xenstore-write 1211 * to change it manually in dom0 1212 */ 1213 static int 1214 xdb_check_state_transition(xdb_t *vdp, XenbusState oestate) 1215 { 1216 enum xdb_state status; 1217 int stcheck; 1218 #define STOK 0 /* need further process */ 1219 #define STNOP 1 /* no action need taking */ 1220 #define STBUG 2 /* unexpected state change, could be a bug */ 1221 1222 status = vdp->xs_if_status; 1223 stcheck = STOK; 1224 1225 switch (status) { 1226 case XDB_UNKNOWN: 1227 if (vdp->xs_fe_status == XDB_FE_UNKNOWN) { 1228 if ((oestate == XenbusStateUnknown) || 1229 (oestate == XenbusStateConnected)) 1230 stcheck = STBUG; 1231 else if ((oestate == XenbusStateInitialising) || 1232 (oestate == XenbusStateInitWait)) 1233 stcheck = STNOP; 1234 } else { 1235 if ((oestate == XenbusStateUnknown) || 1236 (oestate == XenbusStateInitialising) || 1237 (oestate == XenbusStateInitWait) || 1238 (oestate == XenbusStateConnected)) 1239 stcheck = STBUG; 1240 else if (oestate == XenbusStateInitialised) 1241 stcheck = STNOP; 1242 } 1243 break; 1244 case XDB_CONNECTED: 1245 if ((oestate == XenbusStateUnknown) || 1246 (oestate == XenbusStateInitialising) || 1247 (oestate == XenbusStateInitWait) || 1248 (oestate == XenbusStateInitialised)) 1249 stcheck = STBUG; 1250 else if (oestate == XenbusStateConnected) 1251 stcheck = STNOP; 1252 break; 1253 case XDB_DISCONNECTED: 1254 default: 1255 stcheck = STBUG; 1256 } 1257 1258 if (stcheck == STOK) 1259 return (DDI_SUCCESS); 1260 1261 if (stcheck == STBUG) 1262 cmn_err(CE_NOTE, "xdb@%s: unexpected otherend " 1263 "state change to %d!, when status is %d", 1264 ddi_get_name_addr(vdp->xs_dip), oestate, status); 1265 1266 return (DDI_FAILURE); 1267 } 1268 1269 static void 1270 xdb_send_buf(void *arg) 1271 { 1272 buf_t *bp; 1273 xdb_t *vdp = (xdb_t *)arg; 1274 1275 mutex_enter(&vdp->xs_iomutex); 1276 1277 while (vdp->xs_if_status != XDB_DISCONNECTED) { 1278 while ((bp = vdp->xs_f_iobuf) != NULL) { 1279 vdp->xs_f_iobuf = bp->av_forw; 1280 bp->av_forw = NULL; 1281 vdp->xs_ionum++; 1282 mutex_exit(&vdp->xs_iomutex); 1283 if (bp->b_bcount != 0) { 1284 int err = ldi_strategy(vdp->xs_ldi_hdl, bp); 1285 if (err != 0) { 1286 bp->b_flags |= B_ERROR; 1287 (void) xdb_biodone(bp); 1288 XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, 1289 "xdb@%s: sent buf to backend dev" 1290 "failed, err=%d", 1291 ddi_get_name_addr(vdp->xs_dip), 1292 err)); 1293 } else { 1294 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, 1295 "sent buf to backend ok")); 1296 } 1297 } else /* no I/O need to be done */ 1298 (void) xdb_biodone(bp); 1299 1300 mutex_enter(&vdp->xs_iomutex); 1301 } 1302 1303 if (vdp->xs_if_status != XDB_DISCONNECTED) 1304 cv_wait(&vdp->xs_iocv, &vdp->xs_iomutex); 1305 } 1306 1307 mutex_exit(&vdp->xs_iomutex); 1308 } 1309 1310 /*ARGSUSED*/ 1311 static void 1312 xdb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, 1313 void *impl_data) 1314 { 1315 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 1316 xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); 1317 1318 XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: " 1319 "hotplug status change to %d!", ddi_get_name_addr(dip), state)); 1320 1321 mutex_enter(&vdp->xs_cbmutex); 1322 if (state == Connected) { 1323 /* Hotplug script has completed successfully */ 1324 if (vdp->xs_dev_status == XDB_DEV_UNKNOWN) { 1325 vdp->xs_dev_status = XDB_DEV_READY; 1326 if (vdp->xs_fe_status == XDB_FE_READY) 1327 /* try to connect to frontend */ 1328 if (xdb_start_connect(vdp) != DDI_SUCCESS) 1329 (void) xdb_start_disconnect(vdp); 1330 } 1331 } 1332 mutex_exit(&vdp->xs_cbmutex); 1333 } 1334 1335 /*ARGSUSED*/ 1336 static void 1337 xdb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, 1338 void *impl_data) 1339 { 1340 XenbusState new_state = *(XenbusState *)impl_data; 1341 xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); 1342 1343 XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: " 1344 "otherend state change to %d!", ddi_get_name_addr(dip), new_state)); 1345 1346 mutex_enter(&vdp->xs_cbmutex); 1347 1348 if (xdb_check_state_transition(vdp, new_state) == DDI_FAILURE) { 1349 mutex_exit(&vdp->xs_cbmutex); 1350 return; 1351 } 1352 1353 switch (new_state) { 1354 case XenbusStateInitialised: 1355 ASSERT(vdp->xs_if_status == XDB_UNKNOWN); 1356 1357 /* frontend is ready for connecting */ 1358 vdp->xs_fe_status = XDB_FE_READY; 1359 1360 if (vdp->xs_dev_status == XDB_DEV_READY) 1361 if (xdb_start_connect(vdp) != DDI_SUCCESS) 1362 (void) xdb_start_disconnect(vdp); 1363 break; 1364 case XenbusStateClosing: 1365 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1366 break; 1367 case XenbusStateClosed: 1368 /* clean up */ 1369 xdb_close(dip); 1370 } 1371 1372 mutex_exit(&vdp->xs_cbmutex); 1373 } 1374 1375 static int 1376 xdb_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1377 { 1378 xdb_t *vdp; 1379 ddi_iblock_cookie_t ibc; 1380 int instance; 1381 1382 switch (cmd) { 1383 case DDI_RESUME: 1384 return (DDI_FAILURE); 1385 case DDI_ATTACH: 1386 break; 1387 default: 1388 return (DDI_FAILURE); 1389 } 1390 1391 /* DDI_ATTACH */ 1392 instance = ddi_get_instance(dip); 1393 if (ddi_soft_state_zalloc(xdb_statep, instance) != DDI_SUCCESS) 1394 return (DDI_FAILURE); 1395 1396 vdp = ddi_get_soft_state(xdb_statep, instance); 1397 vdp->xs_dip = dip; 1398 if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS) 1399 goto errout1; 1400 1401 if (!xdb_kstat_init(vdp)) 1402 goto errout1; 1403 1404 mutex_init(&vdp->xs_iomutex, NULL, MUTEX_DRIVER, (void *)ibc); 1405 mutex_init(&vdp->xs_cbmutex, NULL, MUTEX_DRIVER, (void *)ibc); 1406 cv_init(&vdp->xs_iocv, NULL, CV_DRIVER, NULL); 1407 cv_init(&vdp->xs_ionumcv, NULL, CV_DRIVER, NULL); 1408 1409 ddi_set_driver_private(dip, vdp); 1410 1411 vdp->xs_iotaskq = ddi_taskq_create(dip, "xdb_iotask", 1, 1412 TASKQ_DEFAULTPRI, 0); 1413 if (vdp->xs_iotaskq == NULL) 1414 goto errout2; 1415 (void) ddi_taskq_dispatch(vdp->xs_iotaskq, xdb_send_buf, vdp, 1416 DDI_SLEEP); 1417 1418 /* Watch frontend and hotplug state change */ 1419 if (xvdi_add_event_handler(dip, XS_OE_STATE, xdb_oe_state_change) != 1420 DDI_SUCCESS) 1421 goto errout3; 1422 if (xvdi_add_event_handler(dip, XS_HP_STATE, xdb_hp_state_change) != 1423 DDI_SUCCESS) { 1424 goto errout4; 1425 } 1426 1427 /* 1428 * Kick-off hotplug script 1429 */ 1430 if (xvdi_post_event(dip, XEN_HP_ADD) != DDI_SUCCESS) { 1431 cmn_err(CE_WARN, "xdb@%s: failed to start hotplug script", 1432 ddi_get_name_addr(dip)); 1433 goto errout4; 1434 } 1435 1436 /* 1437 * start waiting for hotplug event and otherend state event 1438 * mainly for debugging, frontend will not take any op seeing this 1439 */ 1440 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 1441 1442 XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: attached!", 1443 ddi_get_name_addr(dip))); 1444 return (DDI_SUCCESS); 1445 1446 errout4: 1447 xvdi_remove_event_handler(dip, NULL); 1448 errout3: 1449 mutex_enter(&vdp->xs_cbmutex); 1450 mutex_enter(&vdp->xs_iomutex); 1451 vdp->xs_if_status = XDB_DISCONNECTED; 1452 cv_broadcast(&vdp->xs_iocv); 1453 mutex_exit(&vdp->xs_iomutex); 1454 mutex_exit(&vdp->xs_cbmutex); 1455 ddi_taskq_destroy(vdp->xs_iotaskq); 1456 errout2: 1457 ddi_set_driver_private(dip, NULL); 1458 cv_destroy(&vdp->xs_iocv); 1459 cv_destroy(&vdp->xs_ionumcv); 1460 mutex_destroy(&vdp->xs_cbmutex); 1461 mutex_destroy(&vdp->xs_iomutex); 1462 kstat_delete(vdp->xs_kstats); 1463 errout1: 1464 ddi_soft_state_free(xdb_statep, instance); 1465 return (DDI_FAILURE); 1466 } 1467 1468 /*ARGSUSED*/ 1469 static int 1470 xdb_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1471 { 1472 int instance = ddi_get_instance(dip); 1473 xdb_t *vdp = XDB_INST2SOFTS(instance); 1474 1475 switch (cmd) { 1476 case DDI_SUSPEND: 1477 return (DDI_FAILURE); 1478 case DDI_DETACH: 1479 break; 1480 default: 1481 return (DDI_FAILURE); 1482 } 1483 1484 /* DDI_DETACH handling */ 1485 1486 /* shouldn't detach, if still used by frontend */ 1487 mutex_enter(&vdp->xs_iomutex); 1488 if (vdp->xs_if_status != XDB_DISCONNECTED) { 1489 mutex_exit(&vdp->xs_iomutex); 1490 return (DDI_FAILURE); 1491 } 1492 mutex_exit(&vdp->xs_iomutex); 1493 1494 xvdi_remove_event_handler(dip, NULL); 1495 /* can do nothing about it, if it fails */ 1496 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1497 1498 ddi_taskq_destroy(vdp->xs_iotaskq); 1499 cv_destroy(&vdp->xs_iocv); 1500 cv_destroy(&vdp->xs_ionumcv); 1501 mutex_destroy(&vdp->xs_cbmutex); 1502 mutex_destroy(&vdp->xs_iomutex); 1503 kstat_delete(vdp->xs_kstats); 1504 ddi_set_driver_private(dip, NULL); 1505 ddi_soft_state_free(xdb_statep, instance); 1506 1507 XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: detached!", 1508 ddi_get_name_addr(dip))); 1509 return (DDI_SUCCESS); 1510 } 1511 1512 static struct dev_ops xdb_dev_ops = { 1513 DEVO_REV, /* devo_rev */ 1514 0, /* devo_refcnt */ 1515 ddi_getinfo_1to1, /* devo_getinfo */ 1516 nulldev, /* devo_identify */ 1517 nulldev, /* devo_probe */ 1518 xdb_attach, /* devo_attach */ 1519 xdb_detach, /* devo_detach */ 1520 nodev, /* devo_reset */ 1521 NULL, /* devo_cb_ops */ 1522 NULL, /* devo_bus_ops */ 1523 NULL, /* power */ 1524 ddi_quiesce_not_needed, /* quiesce */ 1525 }; 1526 1527 /* 1528 * Module linkage information for the kernel. 1529 */ 1530 static struct modldrv modldrv = { 1531 &mod_driverops, /* Type of module. */ 1532 "vbd backend driver", /* Name of the module */ 1533 &xdb_dev_ops /* driver ops */ 1534 }; 1535 1536 static struct modlinkage xdb_modlinkage = { 1537 MODREV_1, 1538 &modldrv, 1539 NULL 1540 }; 1541 1542 int 1543 _init(void) 1544 { 1545 int rv; 1546 1547 if ((rv = ddi_soft_state_init((void **)&xdb_statep, 1548 sizeof (xdb_t), 0)) == 0) 1549 if ((rv = mod_install(&xdb_modlinkage)) != 0) 1550 ddi_soft_state_fini((void **)&xdb_statep); 1551 return (rv); 1552 } 1553 1554 int 1555 _fini(void) 1556 { 1557 int rv; 1558 1559 if ((rv = mod_remove(&xdb_modlinkage)) != 0) 1560 return (rv); 1561 ddi_soft_state_fini((void **)&xdb_statep); 1562 return (rv); 1563 } 1564 1565 int 1566 _info(struct modinfo *modinfop) 1567 { 1568 return (mod_info(&xdb_modlinkage, modinfop)); 1569 } 1570 1571 static int 1572 xdb_get_request(xdb_t *vdp, blkif_request_t *req) 1573 { 1574 void *src = xvdi_ring_get_request(vdp->xs_ring); 1575 1576 if (src == NULL) 1577 return (0); 1578 1579 switch (vdp->xs_blk_protocol) { 1580 case BLKIF_PROTOCOL_NATIVE: 1581 (void) memcpy(req, src, sizeof (*req)); 1582 break; 1583 case BLKIF_PROTOCOL_X86_32: 1584 blkif_get_x86_32_req(req, src); 1585 break; 1586 case BLKIF_PROTOCOL_X86_64: 1587 blkif_get_x86_64_req(req, src); 1588 break; 1589 default: 1590 cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d", 1591 ddi_get_name_addr(vdp->xs_dip), 1592 vdp->xs_blk_protocol); 1593 } 1594 return (1); 1595 } 1596 1597 static int 1598 xdb_push_response(xdb_t *vdp, uint64_t id, uint8_t op, uint16_t status) 1599 { 1600 ddi_acc_handle_t acchdl = vdp->xs_ring_hdl; 1601 blkif_response_t *rsp = xvdi_ring_get_response(vdp->xs_ring); 1602 blkif_x86_32_response_t *rsp_32 = (blkif_x86_32_response_t *)rsp; 1603 blkif_x86_64_response_t *rsp_64 = (blkif_x86_64_response_t *)rsp; 1604 1605 ASSERT(rsp); 1606 1607 switch (vdp->xs_blk_protocol) { 1608 case BLKIF_PROTOCOL_NATIVE: 1609 ddi_put64(acchdl, &rsp->id, id); 1610 ddi_put8(acchdl, &rsp->operation, op); 1611 ddi_put16(acchdl, (uint16_t *)&rsp->status, 1612 status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR); 1613 break; 1614 case BLKIF_PROTOCOL_X86_32: 1615 ddi_put64(acchdl, &rsp_32->id, id); 1616 ddi_put8(acchdl, &rsp_32->operation, op); 1617 ddi_put16(acchdl, (uint16_t *)&rsp_32->status, 1618 status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR); 1619 break; 1620 case BLKIF_PROTOCOL_X86_64: 1621 ddi_put64(acchdl, &rsp_64->id, id); 1622 ddi_put8(acchdl, &rsp_64->operation, op); 1623 ddi_put16(acchdl, (uint16_t *)&rsp_64->status, 1624 status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR); 1625 break; 1626 default: 1627 cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d", 1628 ddi_get_name_addr(vdp->xs_dip), 1629 vdp->xs_blk_protocol); 1630 } 1631 1632 return (xvdi_ring_push_response(vdp->xs_ring)); 1633 } 1634 1635 static void 1636 blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src) 1637 { 1638 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; 1639 dst->operation = src->operation; 1640 dst->nr_segments = src->nr_segments; 1641 dst->handle = src->handle; 1642 dst->id = src->id; 1643 dst->sector_number = src->sector_number; 1644 if (n > src->nr_segments) 1645 n = src->nr_segments; 1646 for (i = 0; i < n; i++) 1647 dst->seg[i] = src->seg[i]; 1648 } 1649 1650 static void 1651 blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src) 1652 { 1653 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; 1654 dst->operation = src->operation; 1655 dst->nr_segments = src->nr_segments; 1656 dst->handle = src->handle; 1657 dst->id = src->id; 1658 dst->sector_number = src->sector_number; 1659 if (n > src->nr_segments) 1660 n = src->nr_segments; 1661 for (i = 0; i < n; i++) 1662 dst->seg[i] = src->seg[i]; 1663 } 1664