1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Note: This is the backend part of the split PV disk driver. This driver 29 * is not a nexus driver, nor is it a leaf driver(block/char/stream driver). 30 * Currently, it does not create any minor node. So, although, it runs in 31 * backend domain, it will not be used directly from within dom0. 32 * It simply gets block I/O requests issued by frontend from a shared page 33 * (blkif ring buffer - defined by Xen) between backend and frontend domain, 34 * generates a buf, and push it down to underlying disk target driver via 35 * ldi interface. When buf is done, this driver will generate a response 36 * and put it into ring buffer to inform frontend of the status of the I/O 37 * request issued by it. When a new virtual device entry is added in xenstore, 38 * there will be an watch event sent from Xen to xvdi framework, who will, 39 * in turn, create the devinfo node and try to attach this driver 40 * (see xvdi_create_dev). When frontend peer changes its state to 41 * XenbusStateClose, an event will also be sent from Xen to xvdi framework, 42 * who will detach and remove this devinfo node (see i_xvdi_oestate_handler). 43 * I/O requests get from ring buffer and event coming from xenstore cannot be 44 * trusted. We verify them in xdb_get_buf() and xdb_check_state_transition(). 45 * 46 * Virtual device configuration is read/written from/to the database via 47 * xenbus_* interfaces. Driver also use xvdi_* to interact with hypervisor. 48 * There is an on-going effort to make xvdi_* cover all xenbus_*. 49 */ 50 51 #pragma ident "%Z%%M% %I% %E% SMI" 52 53 #include <sys/types.h> 54 #include <sys/conf.h> 55 #include <sys/ddi.h> 56 #include <sys/dditypes.h> 57 #include <sys/sunddi.h> 58 #include <sys/list.h> 59 #include <sys/dkio.h> 60 #include <sys/cmlb.h> 61 #include <sys/vtoc.h> 62 #include <sys/modctl.h> 63 #include <sys/bootconf.h> 64 #include <sys/promif.h> 65 #include <sys/sysmacros.h> 66 #include <public/io/xenbus.h> 67 #include <xen/sys/xenbus_impl.h> 68 #include <xen/sys/xendev.h> 69 #include <sys/gnttab.h> 70 #include <sys/scsi/generic/inquiry.h> 71 #include <vm/seg_kmem.h> 72 #include <vm/hat_i86.h> 73 #include <sys/gnttab.h> 74 #include <sys/lofi.h> 75 #include <io/xdf.h> 76 #include <xen/io/blkif_impl.h> 77 #include <io/xdb.h> 78 79 static xdb_t *xdb_statep; 80 static int xdb_debug = 0; 81 82 static int xdb_push_response(xdb_t *, uint64_t, uint8_t, uint16_t); 83 static int xdb_get_request(xdb_t *, blkif_request_t *); 84 static void blkif_get_x86_32_req(blkif_request_t *, blkif_x86_32_request_t *); 85 static void blkif_get_x86_64_req(blkif_request_t *, blkif_x86_64_request_t *); 86 87 #ifdef DEBUG 88 /* 89 * debug aid functions 90 */ 91 92 static void 93 logva(xdb_t *vdp, uint64_t va) 94 { 95 uint64_t *page_addrs; 96 int i; 97 98 page_addrs = vdp->page_addrs; 99 for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) { 100 if (page_addrs[i] == va) 101 debug_enter("VA remapping found!"); 102 } 103 104 for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) { 105 if (page_addrs[i] == 0) { 106 page_addrs[i] = va; 107 break; 108 } 109 } 110 ASSERT(i < XDB_MAX_IO_PAGES(vdp)); 111 } 112 113 static void 114 unlogva(xdb_t *vdp, uint64_t va) 115 { 116 uint64_t *page_addrs; 117 int i; 118 119 page_addrs = vdp->page_addrs; 120 for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) { 121 if (page_addrs[i] == va) { 122 page_addrs[i] = 0; 123 break; 124 } 125 } 126 ASSERT(i < XDB_MAX_IO_PAGES(vdp)); 127 } 128 129 static void 130 xdb_dump_request_oe(blkif_request_t *req) 131 { 132 int i; 133 134 /* 135 * Exploit the public interface definitions for BLKIF_OP_READ 136 * etc.. 137 */ 138 char *op_name[] = { "read", "write", "barrier", "flush" }; 139 140 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "op=%s", op_name[req->operation])); 141 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "num of segments=%d", 142 req->nr_segments)); 143 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "handle=%d", req->handle)); 144 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "id=%llu", 145 (unsigned long long)req->id)); 146 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "start sector=%llu", 147 (unsigned long long)req->sector_number)); 148 for (i = 0; i < req->nr_segments; i++) { 149 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "gref=%d, first sec=%d," 150 "last sec=%d", req->seg[i].gref, req->seg[i].first_sect, 151 req->seg[i].last_sect)); 152 } 153 } 154 #endif /* DEBUG */ 155 156 /* 157 * Statistics. 158 */ 159 static char *xdb_stats[] = { 160 "rd_reqs", 161 "wr_reqs", 162 "br_reqs", 163 "fl_reqs", 164 "oo_reqs" 165 }; 166 167 static int 168 xdb_kstat_update(kstat_t *ksp, int flag) 169 { 170 xdb_t *vdp; 171 kstat_named_t *knp; 172 173 if (flag != KSTAT_READ) 174 return (EACCES); 175 176 vdp = ksp->ks_private; 177 knp = ksp->ks_data; 178 179 /* 180 * Assignment order should match that of the names in 181 * xdb_stats. 182 */ 183 (knp++)->value.ui64 = vdp->xs_stat_req_reads; 184 (knp++)->value.ui64 = vdp->xs_stat_req_writes; 185 (knp++)->value.ui64 = vdp->xs_stat_req_barriers; 186 (knp++)->value.ui64 = vdp->xs_stat_req_flushes; 187 (knp++)->value.ui64 = 0; /* oo_req */ 188 189 return (0); 190 } 191 192 static boolean_t 193 xdb_kstat_init(xdb_t *vdp) 194 { 195 int nstat = sizeof (xdb_stats) / sizeof (xdb_stats[0]); 196 char **cp = xdb_stats; 197 kstat_named_t *knp; 198 199 if ((vdp->xs_kstats = kstat_create("xdb", 200 ddi_get_instance(vdp->xs_dip), 201 "req_statistics", "block", KSTAT_TYPE_NAMED, 202 nstat, 0)) == NULL) 203 return (B_FALSE); 204 205 vdp->xs_kstats->ks_private = vdp; 206 vdp->xs_kstats->ks_update = xdb_kstat_update; 207 208 knp = vdp->xs_kstats->ks_data; 209 while (nstat > 0) { 210 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 211 knp++; 212 cp++; 213 nstat--; 214 } 215 216 kstat_install(vdp->xs_kstats); 217 218 return (B_TRUE); 219 } 220 221 static int xdb_biodone(buf_t *); 222 223 static buf_t * 224 xdb_get_buf(xdb_t *vdp, blkif_request_t *req, xdb_request_t *xreq) 225 { 226 buf_t *bp; 227 uint8_t segs, curseg; 228 int sectors; 229 int i, err; 230 gnttab_map_grant_ref_t mapops[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 231 ddi_acc_handle_t acchdl; 232 233 acchdl = vdp->xs_ring_hdl; 234 bp = XDB_XREQ2BP(xreq); 235 curseg = xreq->xr_curseg; 236 /* init a new xdb request */ 237 if (req != NULL) { 238 ASSERT(MUTEX_HELD(&vdp->xs_iomutex)); 239 boolean_t pagemapok = B_TRUE; 240 uint8_t op = ddi_get8(acchdl, &req->operation); 241 242 xreq->xr_vdp = vdp; 243 xreq->xr_op = op; 244 xreq->xr_id = ddi_get64(acchdl, &req->id); 245 segs = xreq->xr_buf_pages = ddi_get8(acchdl, &req->nr_segments); 246 if (segs == 0) { 247 if (op != BLKIF_OP_FLUSH_DISKCACHE) 248 cmn_err(CE_WARN, "!non-BLKIF_OP_FLUSH_DISKCACHE" 249 " is seen from domain %d with zero " 250 "length data buffer!", vdp->xs_peer); 251 bioinit(bp); 252 bp->b_bcount = 0; 253 bp->b_lblkno = 0; 254 bp->b_un.b_addr = NULL; 255 return (bp); 256 } else if (op == BLKIF_OP_FLUSH_DISKCACHE) { 257 cmn_err(CE_WARN, "!BLKIF_OP_FLUSH_DISKCACHE" 258 " is seen from domain %d with non-zero " 259 "length data buffer!", vdp->xs_peer); 260 } 261 262 /* 263 * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST 264 * according to the definition of blk interface by Xen 265 * we do sanity check here 266 */ 267 if (segs > BLKIF_MAX_SEGMENTS_PER_REQUEST) 268 segs = xreq->xr_buf_pages = 269 BLKIF_MAX_SEGMENTS_PER_REQUEST; 270 271 for (i = 0; i < segs; i++) { 272 uint8_t fs, ls; 273 274 mapops[i].host_addr = 275 (uint64_t)(uintptr_t)XDB_IOPAGE_VA( 276 vdp->xs_iopage_va, xreq->xr_idx, i); 277 mapops[i].dom = vdp->xs_peer; 278 mapops[i].ref = ddi_get32(acchdl, &req->seg[i].gref); 279 mapops[i].flags = GNTMAP_host_map; 280 if (op != BLKIF_OP_READ) 281 mapops[i].flags |= GNTMAP_readonly; 282 283 fs = ddi_get8(acchdl, &req->seg[i].first_sect); 284 ls = ddi_get8(acchdl, &req->seg[i].last_sect); 285 286 /* 287 * first_sect should be no bigger than last_sect and 288 * both of them should be no bigger than 289 * (PAGESIZE / XB_BSIZE - 1) according to definition 290 * of blk interface by Xen, so sanity check again 291 */ 292 if (fs > (PAGESIZE / XB_BSIZE - 1)) 293 fs = PAGESIZE / XB_BSIZE - 1; 294 if (ls > (PAGESIZE / XB_BSIZE - 1)) 295 ls = PAGESIZE / XB_BSIZE - 1; 296 if (fs > ls) 297 fs = ls; 298 299 xreq->xr_segs[i].fs = fs; 300 xreq->xr_segs[i].ls = ls; 301 } 302 303 /* map in io pages */ 304 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 305 mapops, i); 306 if (err != 0) 307 return (NULL); 308 for (i = 0; i < segs; i++) { 309 /* 310 * Although HYPERVISOR_grant_table_op() returned no 311 * error, mapping of each single page can fail. So, 312 * we have to do the check here and handle the error 313 * if needed 314 */ 315 if (mapops[i].status != GNTST_okay) { 316 int j; 317 for (j = 0; j < i; j++) { 318 #ifdef DEBUG 319 unlogva(vdp, mapops[j].host_addr); 320 #endif 321 xen_release_pfn( 322 xreq->xr_plist[j].p_pagenum); 323 } 324 pagemapok = B_FALSE; 325 break; 326 } 327 /* record page mapping handle for unmapping later */ 328 xreq->xr_page_hdls[i] = mapops[i].handle; 329 #ifdef DEBUG 330 logva(vdp, mapops[i].host_addr); 331 #endif 332 /* 333 * Pass the MFNs down using the shadow list (xr_pplist) 334 * 335 * This is pretty ugly since we have implict knowledge 336 * of how the rootnex binds buffers. 337 * The GNTTABOP_map_grant_ref op makes us do some ugly 338 * stuff since we're not allowed to touch these PTEs 339 * from the VM. 340 * 341 * Obviously, these aren't real page_t's. The rootnex 342 * only needs p_pagenum. 343 * Also, don't use btop() here or 32 bit PAE breaks. 344 */ 345 xreq->xr_pplist[i] = &xreq->xr_plist[i]; 346 xreq->xr_plist[i].p_pagenum = 347 xen_assign_pfn(mapops[i].dev_bus_addr >> PAGESHIFT); 348 } 349 350 /* 351 * not all pages mapped in successfully, unmap those mapped-in 352 * page and return failure 353 */ 354 if (!pagemapok) { 355 gnttab_unmap_grant_ref_t unmapop; 356 357 for (i = 0; i < segs; i++) { 358 if (mapops[i].status != GNTST_okay) 359 continue; 360 unmapop.host_addr = 361 (uint64_t)(uintptr_t)XDB_IOPAGE_VA( 362 vdp->xs_iopage_va, xreq->xr_idx, i); 363 unmapop.dev_bus_addr = NULL; 364 unmapop.handle = mapops[i].handle; 365 (void) HYPERVISOR_grant_table_op( 366 GNTTABOP_unmap_grant_ref, &unmapop, 1); 367 } 368 369 return (NULL); 370 } 371 bioinit(bp); 372 bp->b_lblkno = ddi_get64(acchdl, &req->sector_number); 373 bp->b_flags = B_BUSY | B_SHADOW | B_PHYS; 374 bp->b_flags |= (ddi_get8(acchdl, &req->operation) == 375 BLKIF_OP_READ) ? B_READ : (B_WRITE | B_ASYNC); 376 } else { 377 uint64_t blkst; 378 int isread; 379 380 /* reuse this buf */ 381 blkst = bp->b_lblkno + bp->b_bcount / DEV_BSIZE; 382 isread = bp->b_flags & B_READ; 383 bioreset(bp); 384 bp->b_lblkno = blkst; 385 bp->b_flags = B_BUSY | B_SHADOW | B_PHYS; 386 bp->b_flags |= isread ? B_READ : (B_WRITE | B_ASYNC); 387 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "reuse buf, xreq is %d!!", 388 xreq->xr_idx)); 389 } 390 391 /* form a buf */ 392 bp->b_un.b_addr = XDB_IOPAGE_VA(vdp->xs_iopage_va, xreq->xr_idx, 393 curseg) + xreq->xr_segs[curseg].fs * DEV_BSIZE; 394 bp->b_shadow = &xreq->xr_pplist[curseg]; 395 bp->b_iodone = xdb_biodone; 396 sectors = 0; 397 for (i = curseg; i < xreq->xr_buf_pages; i++) { 398 /* 399 * The xreq->xr_segs[i].fs of the first seg can be non-zero 400 * otherwise, we'll break it into multiple bufs 401 */ 402 if ((i != curseg) && (xreq->xr_segs[i].fs != 0)) { 403 break; 404 } 405 sectors += (xreq->xr_segs[i].ls - xreq->xr_segs[i].fs + 1); 406 } 407 xreq->xr_curseg = i; 408 bp->b_bcount = sectors * DEV_BSIZE; 409 bp->b_bufsize = bp->b_bcount; 410 411 return (bp); 412 } 413 414 static xdb_request_t * 415 xdb_get_req(xdb_t *vdp) 416 { 417 xdb_request_t *req; 418 int idx; 419 420 ASSERT(MUTEX_HELD(&vdp->xs_iomutex)); 421 ASSERT(vdp->xs_free_req != -1); 422 req = &vdp->xs_req[vdp->xs_free_req]; 423 vdp->xs_free_req = req->xr_next; 424 idx = req->xr_idx; 425 bzero(req, sizeof (xdb_request_t)); 426 req->xr_idx = idx; 427 return (req); 428 } 429 430 static void 431 xdb_free_req(xdb_request_t *req) 432 { 433 xdb_t *vdp = req->xr_vdp; 434 435 ASSERT(MUTEX_HELD(&vdp->xs_iomutex)); 436 req->xr_next = vdp->xs_free_req; 437 vdp->xs_free_req = req->xr_idx; 438 } 439 440 static void 441 xdb_response(xdb_t *vdp, blkif_request_t *req, boolean_t ok) 442 { 443 ddi_acc_handle_t acchdl = vdp->xs_ring_hdl; 444 445 if (xdb_push_response(vdp, ddi_get64(acchdl, &req->id), 446 ddi_get8(acchdl, &req->operation), ok)) 447 xvdi_notify_oe(vdp->xs_dip); 448 } 449 450 static void 451 xdb_init_ioreqs(xdb_t *vdp) 452 { 453 int i; 454 455 ASSERT(vdp->xs_nentry); 456 457 if (vdp->xs_req == NULL) 458 vdp->xs_req = kmem_alloc(vdp->xs_nentry * 459 sizeof (xdb_request_t), KM_SLEEP); 460 #ifdef DEBUG 461 if (vdp->page_addrs == NULL) 462 vdp->page_addrs = kmem_zalloc(XDB_MAX_IO_PAGES(vdp) * 463 sizeof (uint64_t), KM_SLEEP); 464 #endif 465 for (i = 0; i < vdp->xs_nentry; i++) { 466 vdp->xs_req[i].xr_idx = i; 467 vdp->xs_req[i].xr_next = i + 1; 468 } 469 vdp->xs_req[vdp->xs_nentry - 1].xr_next = -1; 470 vdp->xs_free_req = 0; 471 472 /* alloc va in host dom for io page mapping */ 473 vdp->xs_iopage_va = vmem_xalloc(heap_arena, 474 XDB_MAX_IO_PAGES(vdp) * PAGESIZE, PAGESIZE, 0, 0, 0, 0, 475 VM_SLEEP); 476 for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) 477 hat_prepare_mapping(kas.a_hat, 478 vdp->xs_iopage_va + i * PAGESIZE); 479 } 480 481 static void 482 xdb_uninit_ioreqs(xdb_t *vdp) 483 { 484 int i; 485 486 for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) 487 hat_release_mapping(kas.a_hat, 488 vdp->xs_iopage_va + i * PAGESIZE); 489 vmem_xfree(heap_arena, vdp->xs_iopage_va, 490 XDB_MAX_IO_PAGES(vdp) * PAGESIZE); 491 if (vdp->xs_req != NULL) { 492 kmem_free(vdp->xs_req, vdp->xs_nentry * sizeof (xdb_request_t)); 493 vdp->xs_req = NULL; 494 } 495 #ifdef DEBUG 496 if (vdp->page_addrs != NULL) { 497 kmem_free(vdp->page_addrs, XDB_MAX_IO_PAGES(vdp) * 498 sizeof (uint64_t)); 499 vdp->page_addrs = NULL; 500 } 501 #endif 502 } 503 504 static uint_t 505 xdb_intr(caddr_t arg) 506 { 507 blkif_request_t req; 508 blkif_request_t *reqp = &req; 509 xdb_request_t *xreq; 510 buf_t *bp; 511 uint8_t op; 512 xdb_t *vdp = (xdb_t *)arg; 513 int ret = DDI_INTR_UNCLAIMED; 514 dev_info_t *dip = vdp->xs_dip; 515 516 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, 517 "xdb@%s: I/O request received from dom %d", 518 ddi_get_name_addr(dip), vdp->xs_peer)); 519 520 mutex_enter(&vdp->xs_iomutex); 521 522 /* shouldn't touch ring buffer if not in connected state */ 523 if (vdp->xs_if_status != XDB_CONNECTED) { 524 mutex_exit(&vdp->xs_iomutex); 525 return (DDI_INTR_UNCLAIMED); 526 } 527 528 /* 529 * We'll loop till there is no more request in the ring 530 * We won't stuck in this loop for ever since the size of ring buffer 531 * is limited, and frontend will stop pushing requests into it when 532 * the ring buffer is full 533 */ 534 535 /* req_event will be increased in xvdi_ring_get_request() */ 536 while (xdb_get_request(vdp, reqp)) { 537 ret = DDI_INTR_CLAIMED; 538 539 op = ddi_get8(vdp->xs_ring_hdl, &reqp->operation); 540 if (op == BLKIF_OP_READ || 541 op == BLKIF_OP_WRITE || 542 op == BLKIF_OP_WRITE_BARRIER || 543 op == BLKIF_OP_FLUSH_DISKCACHE) { 544 #ifdef DEBUG 545 xdb_dump_request_oe(reqp); 546 #endif 547 xreq = xdb_get_req(vdp); 548 ASSERT(xreq); 549 switch (op) { 550 case BLKIF_OP_READ: 551 vdp->xs_stat_req_reads++; 552 break; 553 case BLKIF_OP_WRITE_BARRIER: 554 vdp->xs_stat_req_barriers++; 555 /* FALLTHRU */ 556 case BLKIF_OP_WRITE: 557 vdp->xs_stat_req_writes++; 558 break; 559 case BLKIF_OP_FLUSH_DISKCACHE: 560 vdp->xs_stat_req_flushes++; 561 break; 562 } 563 564 xreq->xr_curseg = 0; /* start from first segment */ 565 bp = xdb_get_buf(vdp, reqp, xreq); 566 if (bp == NULL) { 567 /* failed to form a buf */ 568 xdb_free_req(xreq); 569 xdb_response(vdp, reqp, B_FALSE); 570 continue; 571 } 572 bp->av_forw = NULL; 573 574 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, 575 " buf %p, blkno %lld, size %lu, addr %p", 576 (void *)bp, (longlong_t)bp->b_blkno, 577 (ulong_t)bp->b_bcount, (void *)bp->b_un.b_addr)); 578 579 /* send bp to underlying blk driver */ 580 if (vdp->xs_f_iobuf == NULL) { 581 vdp->xs_f_iobuf = vdp->xs_l_iobuf = bp; 582 } else { 583 vdp->xs_l_iobuf->av_forw = bp; 584 vdp->xs_l_iobuf = bp; 585 } 586 } else { 587 xdb_response(vdp, reqp, B_FALSE); 588 XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: " 589 "Unsupported cmd received from dom %d", 590 ddi_get_name_addr(dip), vdp->xs_peer)); 591 } 592 } 593 /* notify our taskq to push buf to underlying blk driver */ 594 if (ret == DDI_INTR_CLAIMED) 595 cv_broadcast(&vdp->xs_iocv); 596 597 mutex_exit(&vdp->xs_iomutex); 598 599 return (ret); 600 } 601 602 static int 603 xdb_biodone(buf_t *bp) 604 { 605 int i, err, bioerr; 606 uint8_t segs; 607 gnttab_unmap_grant_ref_t unmapops[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 608 xdb_request_t *xreq = XDB_BP2XREQ(bp); 609 xdb_t *vdp = xreq->xr_vdp; 610 buf_t *nbp; 611 612 bioerr = geterror(bp); 613 if (bioerr) 614 XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: I/O error %d", 615 ddi_get_name_addr(vdp->xs_dip), bioerr)); 616 617 /* check if we are done w/ this I/O request */ 618 if ((bioerr == 0) && (xreq->xr_curseg < xreq->xr_buf_pages)) { 619 nbp = xdb_get_buf(vdp, NULL, xreq); 620 if (nbp) { 621 err = ldi_strategy(vdp->xs_ldi_hdl, nbp); 622 if (err == 0) { 623 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, 624 "sent buf to backend ok")); 625 return (DDI_SUCCESS); 626 } 627 bioerr = EIO; 628 XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: " 629 "sent buf to backend dev failed, err=%d", 630 ddi_get_name_addr(vdp->xs_dip), err)); 631 } else { 632 bioerr = EIO; 633 } 634 } 635 636 /* unmap io pages */ 637 segs = xreq->xr_buf_pages; 638 /* 639 * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST 640 * according to the definition of blk interface by Xen 641 */ 642 ASSERT(segs <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 643 for (i = 0; i < segs; i++) { 644 unmapops[i].host_addr = (uint64_t)(uintptr_t)XDB_IOPAGE_VA( 645 vdp->xs_iopage_va, xreq->xr_idx, i); 646 #ifdef DEBUG 647 mutex_enter(&vdp->xs_iomutex); 648 unlogva(vdp, unmapops[i].host_addr); 649 mutex_exit(&vdp->xs_iomutex); 650 #endif 651 unmapops[i].dev_bus_addr = NULL; 652 unmapops[i].handle = xreq->xr_page_hdls[i]; 653 } 654 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 655 unmapops, segs); 656 ASSERT(!err); 657 658 /* 659 * If we have reached a barrier write or a cache flush , then we must 660 * flush all our I/Os. 661 */ 662 if (xreq->xr_op == BLKIF_OP_WRITE_BARRIER || 663 xreq->xr_op == BLKIF_OP_FLUSH_DISKCACHE) { 664 /* 665 * XXX At this point the write did succeed, so I don't 666 * believe we should report an error because the flush 667 * failed. However, this is a debatable point, so 668 * maybe we need to think more carefully about this. 669 * For now, just cast to void. 670 */ 671 (void) ldi_ioctl(vdp->xs_ldi_hdl, 672 DKIOCFLUSHWRITECACHE, NULL, FKIOCTL, kcred, NULL); 673 } 674 675 mutex_enter(&vdp->xs_iomutex); 676 677 /* send response back to frontend */ 678 if (vdp->xs_if_status == XDB_CONNECTED) { 679 if (xdb_push_response(vdp, xreq->xr_id, xreq->xr_op, bioerr)) 680 xvdi_notify_oe(vdp->xs_dip); 681 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, 682 "sent resp back to frontend, id=%llu", 683 (unsigned long long)xreq->xr_id)); 684 } 685 /* free io resources */ 686 biofini(bp); 687 xdb_free_req(xreq); 688 689 vdp->xs_ionum--; 690 if ((vdp->xs_if_status != XDB_CONNECTED) && (vdp->xs_ionum == 0)) { 691 /* we're closing, someone is waiting for I/O clean-up */ 692 cv_signal(&vdp->xs_ionumcv); 693 } 694 695 mutex_exit(&vdp->xs_iomutex); 696 697 return (DDI_SUCCESS); 698 } 699 700 static int 701 xdb_bindto_frontend(xdb_t *vdp) 702 { 703 int err; 704 char *oename; 705 grant_ref_t gref; 706 evtchn_port_t evtchn; 707 dev_info_t *dip = vdp->xs_dip; 708 char protocol[64] = ""; 709 710 /* 711 * Gather info from frontend 712 */ 713 oename = xvdi_get_oename(dip); 714 if (oename == NULL) 715 return (DDI_FAILURE); 716 717 err = xenbus_gather(XBT_NULL, oename, 718 "ring-ref", "%lu", &gref, "event-channel", "%u", &evtchn, NULL); 719 if (err != 0) { 720 xvdi_fatal_error(dip, err, 721 "Getting ring-ref and evtchn from frontend"); 722 return (DDI_FAILURE); 723 } 724 725 vdp->xs_blk_protocol = BLKIF_PROTOCOL_NATIVE; 726 vdp->xs_nentry = BLKIF_RING_SIZE; 727 vdp->xs_entrysize = sizeof (union blkif_sring_entry); 728 729 err = xenbus_gather(XBT_NULL, oename, 730 "protocol", "%63s", protocol, NULL); 731 if (err) 732 (void) strcpy(protocol, "unspecified, assuming native"); 733 else { 734 /* 735 * We must check for NATIVE first, so that the fast path 736 * is taken for copying data from the guest to the host. 737 */ 738 if (strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE) != 0) { 739 if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_32) == 0) { 740 vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_32; 741 vdp->xs_nentry = BLKIF_X86_32_RING_SIZE; 742 vdp->xs_entrysize = 743 sizeof (union blkif_x86_32_sring_entry); 744 } else if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_64) == 745 0) { 746 vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_64; 747 vdp->xs_nentry = BLKIF_X86_64_RING_SIZE; 748 vdp->xs_entrysize = 749 sizeof (union blkif_x86_64_sring_entry); 750 } else { 751 xvdi_fatal_error(dip, err, "unknown protocol"); 752 return (DDI_FAILURE); 753 } 754 } 755 } 756 #ifdef DEBUG 757 cmn_err(CE_NOTE, "xdb@%s: blkif protocol '%s' ", 758 ddi_get_name_addr(dip), protocol); 759 #endif 760 761 /* 762 * map and init ring 763 * 764 * The ring parameters must match those which have been allocated 765 * in the front end. 766 */ 767 err = xvdi_map_ring(dip, vdp->xs_nentry, vdp->xs_entrysize, 768 gref, &vdp->xs_ring); 769 if (err != DDI_SUCCESS) 770 return (DDI_FAILURE); 771 /* 772 * This will be removed after we use shadow I/O ring request since 773 * we don't need to access the ring itself directly, thus the access 774 * handle is not needed 775 */ 776 vdp->xs_ring_hdl = vdp->xs_ring->xr_acc_hdl; 777 778 /* 779 * bind event channel 780 */ 781 err = xvdi_bind_evtchn(dip, evtchn); 782 if (err != DDI_SUCCESS) { 783 xvdi_unmap_ring(vdp->xs_ring); 784 return (DDI_FAILURE); 785 } 786 787 return (DDI_SUCCESS); 788 } 789 790 static void 791 xdb_unbindfrom_frontend(xdb_t *vdp) 792 { 793 xvdi_free_evtchn(vdp->xs_dip); 794 xvdi_unmap_ring(vdp->xs_ring); 795 } 796 797 #define LOFI_CTRL_NODE "/dev/lofictl" 798 #define LOFI_DEV_NODE "/devices/pseudo/lofi@0:" 799 #define LOFI_MODE FREAD | FWRITE | FEXCL 800 801 static int 802 xdb_setup_node(xdb_t *vdp, char *path) 803 { 804 dev_info_t *dip; 805 char *xsnode, *node; 806 ldi_handle_t ldi_hdl; 807 struct lofi_ioctl *li; 808 int minor; 809 int err; 810 unsigned int len; 811 812 dip = vdp->xs_dip; 813 xsnode = xvdi_get_xsname(dip); 814 if (xsnode == NULL) 815 return (DDI_FAILURE); 816 817 err = xenbus_read(XBT_NULL, xsnode, "params", (void **)&node, &len); 818 if (err != 0) { 819 xvdi_fatal_error(vdp->xs_dip, err, "reading 'params'"); 820 return (DDI_FAILURE); 821 } 822 823 if (!XDB_IS_LOFI(vdp)) { 824 (void) strlcpy(path, node, MAXPATHLEN + 1); 825 kmem_free(node, len); 826 return (DDI_SUCCESS); 827 } 828 829 do { 830 err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred, 831 &ldi_hdl, vdp->xs_ldi_li); 832 } while (err == EBUSY); 833 if (err != 0) { 834 kmem_free(node, len); 835 return (DDI_FAILURE); 836 } 837 838 li = kmem_zalloc(sizeof (*li), KM_SLEEP); 839 (void) strlcpy(li->li_filename, node, MAXPATHLEN + 1); 840 kmem_free(node, len); 841 if (ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li, 842 LOFI_MODE | FKIOCTL, kcred, &minor) != 0) { 843 cmn_err(CE_WARN, "xdb@%s: Failed to create lofi dev for %s", 844 ddi_get_name_addr(dip), li->li_filename); 845 (void) ldi_close(ldi_hdl, LOFI_MODE, kcred); 846 kmem_free(li, sizeof (*li)); 847 return (DDI_FAILURE); 848 } 849 /* 850 * return '/devices/...' instead of '/dev/lofi/...' since the 851 * former is available immediately after calling ldi_ioctl 852 */ 853 (void) snprintf(path, MAXPATHLEN + 1, LOFI_DEV_NODE "%d", minor); 854 (void) xenbus_printf(XBT_NULL, xsnode, "node", "%s", path); 855 (void) ldi_close(ldi_hdl, LOFI_MODE, kcred); 856 kmem_free(li, sizeof (*li)); 857 return (DDI_SUCCESS); 858 } 859 860 static void 861 xdb_teardown_node(xdb_t *vdp) 862 { 863 dev_info_t *dip; 864 char *xsnode, *node; 865 ldi_handle_t ldi_hdl; 866 struct lofi_ioctl *li; 867 int err; 868 unsigned int len; 869 870 if (!XDB_IS_LOFI(vdp)) 871 return; 872 873 dip = vdp->xs_dip; 874 xsnode = xvdi_get_xsname(dip); 875 if (xsnode == NULL) 876 return; 877 878 err = xenbus_read(XBT_NULL, xsnode, "params", (void **)&node, &len); 879 if (err != 0) { 880 xvdi_fatal_error(vdp->xs_dip, err, "reading 'params'"); 881 return; 882 } 883 884 li = kmem_zalloc(sizeof (*li), KM_SLEEP); 885 (void) strlcpy(li->li_filename, node, MAXPATHLEN + 1); 886 kmem_free(node, len); 887 888 do { 889 err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred, 890 &ldi_hdl, vdp->xs_ldi_li); 891 } while (err == EBUSY); 892 893 if (err != 0) { 894 kmem_free(li, sizeof (*li)); 895 return; 896 } 897 898 if (ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE, (intptr_t)li, 899 LOFI_MODE | FKIOCTL, kcred, NULL) != 0) { 900 cmn_err(CE_WARN, "xdb@%s: Failed to delete lofi dev for %s", 901 ddi_get_name_addr(dip), li->li_filename); 902 } 903 904 (void) ldi_close(ldi_hdl, LOFI_MODE, kcred); 905 kmem_free(li, sizeof (*li)); 906 } 907 908 static int 909 xdb_open_device(xdb_t *vdp) 910 { 911 uint64_t devsize; 912 dev_info_t *dip; 913 char *xsnode; 914 char *nodepath; 915 char *mode = NULL; 916 char *type = NULL; 917 int err; 918 919 dip = vdp->xs_dip; 920 xsnode = xvdi_get_xsname(dip); 921 if (xsnode == NULL) 922 return (DDI_FAILURE); 923 924 err = xenbus_gather(XBT_NULL, xsnode, 925 "mode", NULL, &mode, "type", NULL, &type, NULL); 926 if (err != 0) { 927 if (mode) 928 kmem_free(mode, strlen(mode) + 1); 929 if (type) 930 kmem_free(type, strlen(type) + 1); 931 xvdi_fatal_error(dip, err, 932 "Getting mode and type from backend device"); 933 return (DDI_FAILURE); 934 } 935 if (strcmp(type, "file") == 0) { 936 vdp->xs_type |= XDB_DEV_LOFI; 937 } 938 kmem_free(type, strlen(type) + 1); 939 if ((strcmp(mode, "r") == NULL) || (strcmp(mode, "ro") == NULL)) { 940 vdp->xs_type |= XDB_DEV_RO; 941 } 942 kmem_free(mode, strlen(mode) + 1); 943 944 /* 945 * try to open backend device 946 */ 947 if (ldi_ident_from_dip(dip, &vdp->xs_ldi_li) != 0) 948 return (DDI_FAILURE); 949 950 nodepath = kmem_zalloc(MAXPATHLEN + 1, KM_SLEEP); 951 err = xdb_setup_node(vdp, nodepath); 952 if (err != DDI_SUCCESS) { 953 xvdi_fatal_error(dip, err, 954 "Getting device path of backend device"); 955 ldi_ident_release(vdp->xs_ldi_li); 956 kmem_free(nodepath, MAXPATHLEN + 1); 957 return (DDI_FAILURE); 958 } 959 960 if (ldi_open_by_name(nodepath, 961 FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), 962 kcred, &vdp->xs_ldi_hdl, vdp->xs_ldi_li) != 0) { 963 xdb_teardown_node(vdp); 964 ldi_ident_release(vdp->xs_ldi_li); 965 cmn_err(CE_WARN, "xdb@%s: Failed to open: %s", 966 ddi_get_name_addr(dip), nodepath); 967 kmem_free(nodepath, MAXPATHLEN + 1); 968 return (DDI_FAILURE); 969 } 970 971 /* check if it's a CD/DVD disc */ 972 if (ldi_prop_get_int(vdp->xs_ldi_hdl, LDI_DEV_T_ANY | DDI_PROP_DONTPASS, 973 "inquiry-device-type", DTYPE_DIRECT) == DTYPE_RODIRECT) 974 vdp->xs_type |= XDB_DEV_CD; 975 /* check if it's a removable disk */ 976 if (ldi_prop_exists(vdp->xs_ldi_hdl, 977 LDI_DEV_T_ANY | DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 978 "removable-media")) 979 vdp->xs_type |= XDB_DEV_RMB; 980 981 if (ldi_get_size(vdp->xs_ldi_hdl, &devsize) != DDI_SUCCESS) { 982 (void) ldi_close(vdp->xs_ldi_hdl, 983 FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred); 984 xdb_teardown_node(vdp); 985 ldi_ident_release(vdp->xs_ldi_li); 986 kmem_free(nodepath, MAXPATHLEN + 1); 987 return (DDI_FAILURE); 988 } 989 vdp->xs_sectors = devsize / XB_BSIZE; 990 991 kmem_free(nodepath, MAXPATHLEN + 1); 992 return (DDI_SUCCESS); 993 } 994 995 static void 996 xdb_close_device(xdb_t *vdp) 997 { 998 (void) ldi_close(vdp->xs_ldi_hdl, 999 FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred); 1000 xdb_teardown_node(vdp); 1001 ldi_ident_release(vdp->xs_ldi_li); 1002 vdp->xs_ldi_li = NULL; 1003 vdp->xs_ldi_hdl = NULL; 1004 } 1005 1006 /* 1007 * Kick-off connect process 1008 * If xs_fe_status == XDB_FE_READY and xs_dev_status == XDB_DEV_READY 1009 * the xs_if_status will be changed to XDB_CONNECTED on success, 1010 * otherwise, xs_if_status will not be changed 1011 */ 1012 static int 1013 xdb_start_connect(xdb_t *vdp) 1014 { 1015 uint32_t dinfo; 1016 xenbus_transaction_t xbt; 1017 int err, svdst; 1018 char *xsnode; 1019 dev_info_t *dip = vdp->xs_dip; 1020 char *barrier; 1021 uint_t len; 1022 1023 /* 1024 * Start connect to frontend only when backend device are ready 1025 * and frontend has moved to XenbusStateInitialised, which means 1026 * ready to connect 1027 */ 1028 ASSERT((vdp->xs_fe_status == XDB_FE_READY) && 1029 (vdp->xs_dev_status == XDB_DEV_READY)); 1030 1031 if (((xsnode = xvdi_get_xsname(dip)) == NULL) || 1032 ((vdp->xs_peer = xvdi_get_oeid(dip)) == (domid_t)-1) || 1033 (xdb_open_device(vdp) != DDI_SUCCESS)) 1034 return (DDI_FAILURE); 1035 1036 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialised); 1037 1038 if (xdb_bindto_frontend(vdp) != DDI_SUCCESS) 1039 goto errout1; 1040 1041 /* init i/o requests */ 1042 xdb_init_ioreqs(vdp); 1043 1044 if (ddi_add_intr(dip, 0, NULL, NULL, xdb_intr, (caddr_t)vdp) 1045 != DDI_SUCCESS) 1046 goto errout2; 1047 1048 /* 1049 * we can recieve intr any time from now on 1050 * mark that we're ready to take intr 1051 */ 1052 mutex_enter(&vdp->xs_iomutex); 1053 /* 1054 * save it in case we need to restore when we 1055 * fail to write xenstore later 1056 */ 1057 svdst = vdp->xs_if_status; 1058 vdp->xs_if_status = XDB_CONNECTED; 1059 mutex_exit(&vdp->xs_iomutex); 1060 1061 /* write into xenstore the info needed by frontend */ 1062 trans_retry: 1063 if (xenbus_transaction_start(&xbt)) { 1064 xvdi_fatal_error(dip, EIO, "transaction start"); 1065 goto errout3; 1066 } 1067 1068 /* 1069 * If feature-barrier isn't present in xenstore, add it. 1070 */ 1071 if (xenbus_read(xbt, xsnode, "feature-barrier", 1072 (void **)&barrier, &len) != 0) { 1073 if ((err = xenbus_printf(xbt, xsnode, "feature-barrier", 1074 "%d", 1)) != 0) { 1075 cmn_err(CE_WARN, "xdb@%s: failed to write " 1076 "'feature-barrier'", ddi_get_name_addr(dip)); 1077 xvdi_fatal_error(dip, err, "writing 'feature-barrier'"); 1078 goto abort_trans; 1079 } 1080 } else 1081 kmem_free(barrier, len); 1082 1083 dinfo = 0; 1084 if (XDB_IS_RO(vdp)) 1085 dinfo |= VDISK_READONLY; 1086 if (XDB_IS_CD(vdp)) 1087 dinfo |= VDISK_CDROM; 1088 if (XDB_IS_RMB(vdp)) 1089 dinfo |= VDISK_REMOVABLE; 1090 if (err = xenbus_printf(xbt, xsnode, "info", "%u", dinfo)) { 1091 xvdi_fatal_error(dip, err, "writing 'info'"); 1092 goto abort_trans; 1093 } 1094 1095 /* hard-coded 512-byte sector size */ 1096 if (err = xenbus_printf(xbt, xsnode, "sector-size", "%u", DEV_BSIZE)) { 1097 xvdi_fatal_error(dip, err, "writing 'sector-size'"); 1098 goto abort_trans; 1099 } 1100 1101 if (err = xenbus_printf(xbt, xsnode, "sectors", "%"PRIu64, 1102 vdp->xs_sectors)) { 1103 xvdi_fatal_error(dip, err, "writing 'sectors'"); 1104 goto abort_trans; 1105 } 1106 1107 if (err = xenbus_printf(xbt, xsnode, "instance", "%d", 1108 ddi_get_instance(dip))) { 1109 xvdi_fatal_error(dip, err, "writing 'instance'"); 1110 goto abort_trans; 1111 } 1112 1113 if ((err = xvdi_switch_state(dip, xbt, XenbusStateConnected)) > 0) { 1114 xvdi_fatal_error(dip, err, "writing 'state'"); 1115 goto abort_trans; 1116 } 1117 1118 if (err = xenbus_transaction_end(xbt, 0)) { 1119 if (err == EAGAIN) 1120 /* transaction is ended, don't need to abort it */ 1121 goto trans_retry; 1122 xvdi_fatal_error(dip, err, "completing transaction"); 1123 goto errout3; 1124 } 1125 1126 return (DDI_SUCCESS); 1127 1128 abort_trans: 1129 (void) xenbus_transaction_end(xbt, 1); 1130 errout3: 1131 mutex_enter(&vdp->xs_iomutex); 1132 vdp->xs_if_status = svdst; 1133 mutex_exit(&vdp->xs_iomutex); 1134 ddi_remove_intr(dip, 0, NULL); 1135 errout2: 1136 xdb_uninit_ioreqs(vdp); 1137 xdb_unbindfrom_frontend(vdp); 1138 errout1: 1139 xdb_close_device(vdp); 1140 return (DDI_FAILURE); 1141 } 1142 1143 /* 1144 * Kick-off disconnect process 1145 * xs_if_status will not be changed 1146 */ 1147 static int 1148 xdb_start_disconnect(xdb_t *vdp) 1149 { 1150 /* 1151 * Kick-off disconnect process 1152 */ 1153 if (xvdi_switch_state(vdp->xs_dip, XBT_NULL, XenbusStateClosing) > 0) 1154 return (DDI_FAILURE); 1155 1156 return (DDI_SUCCESS); 1157 } 1158 1159 /* 1160 * Disconnect from frontend and close backend device 1161 * ifstatus will be changed to XDB_DISCONNECTED 1162 * Xenbus state will be changed to XenbusStateClosed 1163 */ 1164 static void 1165 xdb_close(dev_info_t *dip) 1166 { 1167 xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); 1168 1169 ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); 1170 1171 mutex_enter(&vdp->xs_iomutex); 1172 1173 if (vdp->xs_if_status != XDB_CONNECTED) { 1174 vdp->xs_if_status = XDB_DISCONNECTED; 1175 cv_broadcast(&vdp->xs_iocv); 1176 mutex_exit(&vdp->xs_iomutex); 1177 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1178 return; 1179 } 1180 vdp->xs_if_status = XDB_DISCONNECTED; 1181 cv_broadcast(&vdp->xs_iocv); 1182 1183 mutex_exit(&vdp->xs_iomutex); 1184 1185 /* stop accepting I/O request from frontend */ 1186 ddi_remove_intr(dip, 0, NULL); 1187 /* clear all on-going I/Os, if any */ 1188 mutex_enter(&vdp->xs_iomutex); 1189 while (vdp->xs_ionum > 0) 1190 cv_wait(&vdp->xs_ionumcv, &vdp->xs_iomutex); 1191 mutex_exit(&vdp->xs_iomutex); 1192 1193 /* clean up resources and close this interface */ 1194 xdb_uninit_ioreqs(vdp); 1195 xdb_unbindfrom_frontend(vdp); 1196 xdb_close_device(vdp); 1197 vdp->xs_peer = (domid_t)-1; 1198 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1199 } 1200 1201 /* 1202 * Xdb_check_state_transition will check the XenbusState change to see 1203 * if the change is a valid transition or not. 1204 * The new state is written by frontend domain, or by running xenstore-write 1205 * to change it manually in dom0 1206 */ 1207 static int 1208 xdb_check_state_transition(xdb_t *vdp, XenbusState oestate) 1209 { 1210 enum xdb_state status; 1211 int stcheck; 1212 #define STOK 0 /* need further process */ 1213 #define STNOP 1 /* no action need taking */ 1214 #define STBUG 2 /* unexpected state change, could be a bug */ 1215 1216 status = vdp->xs_if_status; 1217 stcheck = STOK; 1218 1219 switch (status) { 1220 case XDB_UNKNOWN: 1221 if (vdp->xs_fe_status == XDB_FE_UNKNOWN) { 1222 if ((oestate == XenbusStateUnknown) || 1223 (oestate == XenbusStateConnected)) 1224 stcheck = STBUG; 1225 else if ((oestate == XenbusStateInitialising) || 1226 (oestate == XenbusStateInitWait)) 1227 stcheck = STNOP; 1228 } else { 1229 if ((oestate == XenbusStateUnknown) || 1230 (oestate == XenbusStateInitialising) || 1231 (oestate == XenbusStateInitWait) || 1232 (oestate == XenbusStateConnected)) 1233 stcheck = STBUG; 1234 else if (oestate == XenbusStateInitialised) 1235 stcheck = STNOP; 1236 } 1237 break; 1238 case XDB_CONNECTED: 1239 if ((oestate == XenbusStateUnknown) || 1240 (oestate == XenbusStateInitialising) || 1241 (oestate == XenbusStateInitWait) || 1242 (oestate == XenbusStateInitialised)) 1243 stcheck = STBUG; 1244 else if (oestate == XenbusStateConnected) 1245 stcheck = STNOP; 1246 break; 1247 case XDB_DISCONNECTED: 1248 default: 1249 stcheck = STBUG; 1250 } 1251 1252 if (stcheck == STOK) 1253 return (DDI_SUCCESS); 1254 1255 if (stcheck == STBUG) 1256 cmn_err(CE_NOTE, "xdb@%s: unexpected otherend " 1257 "state change to %d!, when status is %d", 1258 ddi_get_name_addr(vdp->xs_dip), oestate, status); 1259 1260 return (DDI_FAILURE); 1261 } 1262 1263 static void 1264 xdb_send_buf(void *arg) 1265 { 1266 buf_t *bp; 1267 xdb_t *vdp = (xdb_t *)arg; 1268 1269 mutex_enter(&vdp->xs_iomutex); 1270 1271 while (vdp->xs_if_status != XDB_DISCONNECTED) { 1272 while ((bp = vdp->xs_f_iobuf) != NULL) { 1273 vdp->xs_f_iobuf = bp->av_forw; 1274 bp->av_forw = NULL; 1275 vdp->xs_ionum++; 1276 mutex_exit(&vdp->xs_iomutex); 1277 if (bp->b_bcount != 0) { 1278 int err = ldi_strategy(vdp->xs_ldi_hdl, bp); 1279 if (err != 0) { 1280 bp->b_flags |= B_ERROR; 1281 (void) xdb_biodone(bp); 1282 XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, 1283 "xdb@%s: sent buf to backend dev" 1284 "failed, err=%d", 1285 ddi_get_name_addr(vdp->xs_dip), 1286 err)); 1287 } else { 1288 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, 1289 "sent buf to backend ok")); 1290 } 1291 } else /* no I/O need to be done */ 1292 (void) xdb_biodone(bp); 1293 1294 mutex_enter(&vdp->xs_iomutex); 1295 } 1296 1297 if (vdp->xs_if_status != XDB_DISCONNECTED) 1298 cv_wait(&vdp->xs_iocv, &vdp->xs_iomutex); 1299 } 1300 1301 mutex_exit(&vdp->xs_iomutex); 1302 } 1303 1304 /*ARGSUSED*/ 1305 static void 1306 xdb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, 1307 void *impl_data) 1308 { 1309 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 1310 xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); 1311 1312 XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: " 1313 "hotplug status change to %d!", ddi_get_name_addr(dip), state)); 1314 1315 mutex_enter(&vdp->xs_cbmutex); 1316 if (state == Connected) { 1317 /* Hotplug script has completed successfully */ 1318 if (vdp->xs_dev_status == XDB_DEV_UNKNOWN) { 1319 vdp->xs_dev_status = XDB_DEV_READY; 1320 if (vdp->xs_fe_status == XDB_FE_READY) 1321 /* try to connect to frontend */ 1322 if (xdb_start_connect(vdp) != DDI_SUCCESS) 1323 (void) xdb_start_disconnect(vdp); 1324 } 1325 } 1326 mutex_exit(&vdp->xs_cbmutex); 1327 } 1328 1329 /*ARGSUSED*/ 1330 static void 1331 xdb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, 1332 void *impl_data) 1333 { 1334 XenbusState new_state = *(XenbusState *)impl_data; 1335 xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); 1336 1337 XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: " 1338 "otherend state change to %d!", ddi_get_name_addr(dip), new_state)); 1339 1340 mutex_enter(&vdp->xs_cbmutex); 1341 1342 if (xdb_check_state_transition(vdp, new_state) == DDI_FAILURE) { 1343 mutex_exit(&vdp->xs_cbmutex); 1344 return; 1345 } 1346 1347 switch (new_state) { 1348 case XenbusStateInitialised: 1349 ASSERT(vdp->xs_if_status == XDB_UNKNOWN); 1350 1351 /* frontend is ready for connecting */ 1352 vdp->xs_fe_status = XDB_FE_READY; 1353 1354 if (vdp->xs_dev_status == XDB_DEV_READY) 1355 if (xdb_start_connect(vdp) != DDI_SUCCESS) 1356 (void) xdb_start_disconnect(vdp); 1357 break; 1358 case XenbusStateClosing: 1359 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1360 break; 1361 case XenbusStateClosed: 1362 /* clean up */ 1363 xdb_close(dip); 1364 } 1365 1366 mutex_exit(&vdp->xs_cbmutex); 1367 } 1368 1369 static int 1370 xdb_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1371 { 1372 xdb_t *vdp; 1373 ddi_iblock_cookie_t ibc; 1374 int instance; 1375 1376 switch (cmd) { 1377 case DDI_RESUME: 1378 return (DDI_FAILURE); 1379 case DDI_ATTACH: 1380 break; 1381 default: 1382 return (DDI_FAILURE); 1383 } 1384 1385 /* DDI_ATTACH */ 1386 instance = ddi_get_instance(dip); 1387 if (ddi_soft_state_zalloc(xdb_statep, instance) != DDI_SUCCESS) 1388 return (DDI_FAILURE); 1389 1390 vdp = ddi_get_soft_state(xdb_statep, instance); 1391 vdp->xs_dip = dip; 1392 if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS) 1393 goto errout1; 1394 1395 if (!xdb_kstat_init(vdp)) 1396 goto errout1; 1397 1398 mutex_init(&vdp->xs_iomutex, NULL, MUTEX_DRIVER, (void *)ibc); 1399 mutex_init(&vdp->xs_cbmutex, NULL, MUTEX_DRIVER, (void *)ibc); 1400 cv_init(&vdp->xs_iocv, NULL, CV_DRIVER, NULL); 1401 cv_init(&vdp->xs_ionumcv, NULL, CV_DRIVER, NULL); 1402 1403 ddi_set_driver_private(dip, vdp); 1404 1405 vdp->xs_iotaskq = ddi_taskq_create(dip, "xdb_iotask", 1, 1406 TASKQ_DEFAULTPRI, 0); 1407 if (vdp->xs_iotaskq == NULL) 1408 goto errout2; 1409 (void) ddi_taskq_dispatch(vdp->xs_iotaskq, xdb_send_buf, vdp, 1410 DDI_SLEEP); 1411 1412 /* Watch frontend and hotplug state change */ 1413 if (xvdi_add_event_handler(dip, XS_OE_STATE, xdb_oe_state_change) != 1414 DDI_SUCCESS) 1415 goto errout3; 1416 if (xvdi_add_event_handler(dip, XS_HP_STATE, xdb_hp_state_change) != 1417 DDI_SUCCESS) { 1418 goto errout4; 1419 } 1420 1421 /* 1422 * Kick-off hotplug script 1423 */ 1424 if (xvdi_post_event(dip, XEN_HP_ADD) != DDI_SUCCESS) { 1425 cmn_err(CE_WARN, "xdb@%s: failed to start hotplug script", 1426 ddi_get_name_addr(dip)); 1427 goto errout4; 1428 } 1429 1430 /* 1431 * start waiting for hotplug event and otherend state event 1432 * mainly for debugging, frontend will not take any op seeing this 1433 */ 1434 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 1435 1436 XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: attached!", 1437 ddi_get_name_addr(dip))); 1438 return (DDI_SUCCESS); 1439 1440 errout4: 1441 xvdi_remove_event_handler(dip, NULL); 1442 errout3: 1443 mutex_enter(&vdp->xs_cbmutex); 1444 mutex_enter(&vdp->xs_iomutex); 1445 vdp->xs_if_status = XDB_DISCONNECTED; 1446 cv_broadcast(&vdp->xs_iocv); 1447 mutex_exit(&vdp->xs_iomutex); 1448 mutex_exit(&vdp->xs_cbmutex); 1449 ddi_taskq_destroy(vdp->xs_iotaskq); 1450 errout2: 1451 ddi_set_driver_private(dip, NULL); 1452 cv_destroy(&vdp->xs_iocv); 1453 cv_destroy(&vdp->xs_ionumcv); 1454 mutex_destroy(&vdp->xs_cbmutex); 1455 mutex_destroy(&vdp->xs_iomutex); 1456 kstat_delete(vdp->xs_kstats); 1457 errout1: 1458 ddi_soft_state_free(xdb_statep, instance); 1459 return (DDI_FAILURE); 1460 } 1461 1462 /*ARGSUSED*/ 1463 static int 1464 xdb_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1465 { 1466 int instance = ddi_get_instance(dip); 1467 xdb_t *vdp = XDB_INST2SOFTS(instance); 1468 1469 switch (cmd) { 1470 case DDI_SUSPEND: 1471 return (DDI_FAILURE); 1472 case DDI_DETACH: 1473 break; 1474 default: 1475 return (DDI_FAILURE); 1476 } 1477 1478 /* DDI_DETACH handling */ 1479 1480 /* shouldn't detach, if still used by frontend */ 1481 mutex_enter(&vdp->xs_iomutex); 1482 if (vdp->xs_if_status != XDB_DISCONNECTED) { 1483 mutex_exit(&vdp->xs_iomutex); 1484 return (DDI_FAILURE); 1485 } 1486 mutex_exit(&vdp->xs_iomutex); 1487 1488 xvdi_remove_event_handler(dip, NULL); 1489 /* can do nothing about it, if it fails */ 1490 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1491 1492 ddi_taskq_destroy(vdp->xs_iotaskq); 1493 cv_destroy(&vdp->xs_iocv); 1494 cv_destroy(&vdp->xs_ionumcv); 1495 mutex_destroy(&vdp->xs_cbmutex); 1496 mutex_destroy(&vdp->xs_iomutex); 1497 kstat_delete(vdp->xs_kstats); 1498 ddi_set_driver_private(dip, NULL); 1499 ddi_soft_state_free(xdb_statep, instance); 1500 1501 XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: detached!", 1502 ddi_get_name_addr(dip))); 1503 return (DDI_SUCCESS); 1504 } 1505 1506 static struct dev_ops xdb_dev_ops = { 1507 DEVO_REV, /* devo_rev */ 1508 0, /* devo_refcnt */ 1509 ddi_getinfo_1to1, /* devo_getinfo */ 1510 nulldev, /* devo_identify */ 1511 nulldev, /* devo_probe */ 1512 xdb_attach, /* devo_attach */ 1513 xdb_detach, /* devo_detach */ 1514 nodev, /* devo_reset */ 1515 NULL, /* devo_cb_ops */ 1516 NULL, /* devo_bus_ops */ 1517 NULL /* power */ 1518 }; 1519 1520 /* 1521 * Module linkage information for the kernel. 1522 */ 1523 static struct modldrv modldrv = { 1524 &mod_driverops, /* Type of module. */ 1525 "vbd backend driver 1.4", /* Name of the module */ 1526 &xdb_dev_ops /* driver ops */ 1527 }; 1528 1529 static struct modlinkage xdb_modlinkage = { 1530 MODREV_1, 1531 &modldrv, 1532 NULL 1533 }; 1534 1535 int 1536 _init(void) 1537 { 1538 int rv; 1539 1540 if ((rv = ddi_soft_state_init((void **)&xdb_statep, 1541 sizeof (xdb_t), 0)) == 0) 1542 if ((rv = mod_install(&xdb_modlinkage)) != 0) 1543 ddi_soft_state_fini((void **)&xdb_statep); 1544 return (rv); 1545 } 1546 1547 int 1548 _fini(void) 1549 { 1550 int rv; 1551 1552 if ((rv = mod_remove(&xdb_modlinkage)) != 0) 1553 return (rv); 1554 ddi_soft_state_fini((void **)&xdb_statep); 1555 return (rv); 1556 } 1557 1558 int 1559 _info(struct modinfo *modinfop) 1560 { 1561 return (mod_info(&xdb_modlinkage, modinfop)); 1562 } 1563 1564 static int 1565 xdb_get_request(xdb_t *vdp, blkif_request_t *req) 1566 { 1567 void *src = xvdi_ring_get_request(vdp->xs_ring); 1568 1569 if (src == NULL) 1570 return (0); 1571 1572 switch (vdp->xs_blk_protocol) { 1573 case BLKIF_PROTOCOL_NATIVE: 1574 (void) memcpy(req, src, sizeof (*req)); 1575 break; 1576 case BLKIF_PROTOCOL_X86_32: 1577 blkif_get_x86_32_req(req, src); 1578 break; 1579 case BLKIF_PROTOCOL_X86_64: 1580 blkif_get_x86_64_req(req, src); 1581 break; 1582 default: 1583 cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d", 1584 ddi_get_name_addr(vdp->xs_dip), 1585 vdp->xs_blk_protocol); 1586 } 1587 return (1); 1588 } 1589 1590 static int 1591 xdb_push_response(xdb_t *vdp, uint64_t id, uint8_t op, uint16_t status) 1592 { 1593 ddi_acc_handle_t acchdl = vdp->xs_ring_hdl; 1594 blkif_response_t *rsp = xvdi_ring_get_response(vdp->xs_ring); 1595 blkif_x86_32_response_t *rsp_32 = (blkif_x86_32_response_t *)rsp; 1596 blkif_x86_64_response_t *rsp_64 = (blkif_x86_64_response_t *)rsp; 1597 1598 ASSERT(rsp); 1599 1600 switch (vdp->xs_blk_protocol) { 1601 case BLKIF_PROTOCOL_NATIVE: 1602 ddi_put64(acchdl, &rsp->id, id); 1603 ddi_put8(acchdl, &rsp->operation, op); 1604 ddi_put16(acchdl, (uint16_t *)&rsp->status, 1605 status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR); 1606 break; 1607 case BLKIF_PROTOCOL_X86_32: 1608 ddi_put64(acchdl, &rsp_32->id, id); 1609 ddi_put8(acchdl, &rsp_32->operation, op); 1610 ddi_put16(acchdl, (uint16_t *)&rsp_32->status, 1611 status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR); 1612 break; 1613 case BLKIF_PROTOCOL_X86_64: 1614 ddi_put64(acchdl, &rsp_64->id, id); 1615 ddi_put8(acchdl, &rsp_64->operation, op); 1616 ddi_put16(acchdl, (uint16_t *)&rsp_64->status, 1617 status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR); 1618 break; 1619 default: 1620 cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d", 1621 ddi_get_name_addr(vdp->xs_dip), 1622 vdp->xs_blk_protocol); 1623 } 1624 1625 return (xvdi_ring_push_response(vdp->xs_ring)); 1626 } 1627 1628 static void 1629 blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src) 1630 { 1631 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; 1632 dst->operation = src->operation; 1633 dst->nr_segments = src->nr_segments; 1634 dst->handle = src->handle; 1635 dst->id = src->id; 1636 dst->sector_number = src->sector_number; 1637 if (n > src->nr_segments) 1638 n = src->nr_segments; 1639 for (i = 0; i < n; i++) 1640 dst->seg[i] = src->seg[i]; 1641 } 1642 1643 static void 1644 blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src) 1645 { 1646 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; 1647 dst->operation = src->operation; 1648 dst->nr_segments = src->nr_segments; 1649 dst->handle = src->handle; 1650 dst->id = src->id; 1651 dst->sector_number = src->sector_number; 1652 if (n > src->nr_segments) 1653 n = src->nr_segments; 1654 for (i = 0; i < n; i++) 1655 dst->seg[i] = src->seg[i]; 1656 } 1657