1 /* 2 * XenBSD block device driver 3 * 4 * Copyright (c) 2009 Scott Long, Yahoo! 5 * Copyright (c) 2009 Frank Suchomel, Citrix 6 * Copyright (c) 2009 Doug F. Rabson, Citrix 7 * Copyright (c) 2005 Kip Macy 8 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 9 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 10 * 11 * 12 * Permission is hereby granted, free of charge, to any person obtaining a copy 13 * of this software and associated documentation files (the "Software"), to 14 * deal in the Software without restriction, including without limitation the 15 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 16 * sell copies of the Software, and to permit persons to whom the Software is 17 * furnished to do so, subject to the following conditions: 18 * 19 * The above copyright notice and this permission notice shall be included in 20 * all copies or substantial portions of the Software. 21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 27 * DEALINGS IN THE SOFTWARE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/kernel.h> 37 #include <vm/vm.h> 38 #include <vm/pmap.h> 39 40 #include <sys/bio.h> 41 #include <sys/bus.h> 42 #include <sys/conf.h> 43 #include <sys/module.h> 44 45 #include <machine/bus.h> 46 #include <sys/rman.h> 47 #include <machine/resource.h> 48 #include <machine/intr_machdep.h> 49 #include <machine/vmparam.h> 50 #include <sys/bus_dma.h> 51 52 #include <machine/xen/xen-os.h> 53 #include <machine/xen/xenfunc.h> 54 #include <xen/hypervisor.h> 55 #include <xen/xen_intr.h> 56 #include <xen/evtchn.h> 57 #include <xen/gnttab.h> 58 #include <xen/interface/grant_table.h> 59 #include <xen/interface/io/protocols.h> 60 #include <xen/xenbus/xenbusvar.h> 61 62 #include <geom/geom_disk.h> 63 64 #include <dev/xen/blkfront/block.h> 65 66 #include "xenbus_if.h" 67 68 /* prototypes */ 69 static void xb_free_command(struct xb_command *cm); 70 static void xb_startio(struct xb_softc *sc); 71 static void connect(struct xb_softc *); 72 static void blkfront_closing(device_t); 73 static int blkfront_detach(device_t); 74 static int talk_to_backend(struct xb_softc *); 75 static int setup_blkring(struct xb_softc *); 76 static void blkif_int(void *); 77 static void blkif_recover(struct xb_softc *); 78 static void blkif_completion(struct xb_command *); 79 static void blkif_free(struct xb_softc *, int); 80 static void blkif_queue_cb(void *, bus_dma_segment_t *, int, int); 81 82 #define GRANT_INVALID_REF 0 83 84 /* Control whether runtime update of vbds is enabled. */ 85 #define ENABLE_VBD_UPDATE 0 86 87 #if ENABLE_VBD_UPDATE 88 static void vbd_update(void); 89 #endif 90 91 #define BLKIF_STATE_DISCONNECTED 0 92 #define BLKIF_STATE_CONNECTED 1 93 #define BLKIF_STATE_SUSPENDED 2 94 95 #ifdef notyet 96 static char *blkif_state_name[] = { 97 [BLKIF_STATE_DISCONNECTED] = "disconnected", 98 [BLKIF_STATE_CONNECTED] = "connected", 99 [BLKIF_STATE_SUSPENDED] = "closed", 100 }; 101 102 static char * blkif_status_name[] = { 103 [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", 104 [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", 105 [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", 106 [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", 107 }; 108 #endif 109 110 #if 0 111 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args) 112 #else 113 #define DPRINTK(fmt, args...) 114 #endif 115 116 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ 117 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) 118 119 #define BLKIF_MAXIO (32 * 1024) 120 121 static int blkif_open(struct disk *dp); 122 static int blkif_close(struct disk *dp); 123 static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td); 124 static int blkif_queue_request(struct xb_softc *sc, struct xb_command *cm); 125 static void xb_strategy(struct bio *bp); 126 127 // In order to quiesce the device during kernel dumps, outstanding requests to 128 // DOM0 for disk reads/writes need to be accounted for. 129 static int xb_dump(void *, void *, vm_offset_t, off_t, size_t); 130 131 /* XXX move to xb_vbd.c when VBD update support is added */ 132 #define MAX_VBDS 64 133 134 #define XBD_SECTOR_SIZE 512 /* XXX: assume for now */ 135 #define XBD_SECTOR_SHFT 9 136 137 /* 138 * Translate Linux major/minor to an appropriate name and unit 139 * number. For HVM guests, this allows us to use the same drive names 140 * with blkfront as the emulated drives, easing transition slightly. 141 */ 142 static void 143 blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name) 144 { 145 static struct vdev_info { 146 int major; 147 int shift; 148 int base; 149 const char *name; 150 } info[] = { 151 {3, 6, 0, "ad"}, /* ide0 */ 152 {22, 6, 2, "ad"}, /* ide1 */ 153 {33, 6, 4, "ad"}, /* ide2 */ 154 {34, 6, 6, "ad"}, /* ide3 */ 155 {56, 6, 8, "ad"}, /* ide4 */ 156 {57, 6, 10, "ad"}, /* ide5 */ 157 {88, 6, 12, "ad"}, /* ide6 */ 158 {89, 6, 14, "ad"}, /* ide7 */ 159 {90, 6, 16, "ad"}, /* ide8 */ 160 {91, 6, 18, "ad"}, /* ide9 */ 161 162 {8, 4, 0, "da"}, /* scsi disk0 */ 163 {65, 4, 16, "da"}, /* scsi disk1 */ 164 {66, 4, 32, "da"}, /* scsi disk2 */ 165 {67, 4, 48, "da"}, /* scsi disk3 */ 166 {68, 4, 64, "da"}, /* scsi disk4 */ 167 {69, 4, 80, "da"}, /* scsi disk5 */ 168 {70, 4, 96, "da"}, /* scsi disk6 */ 169 {71, 4, 112, "da"}, /* scsi disk7 */ 170 {128, 4, 128, "da"}, /* scsi disk8 */ 171 {129, 4, 144, "da"}, /* scsi disk9 */ 172 {130, 4, 160, "da"}, /* scsi disk10 */ 173 {131, 4, 176, "da"}, /* scsi disk11 */ 174 {132, 4, 192, "da"}, /* scsi disk12 */ 175 {133, 4, 208, "da"}, /* scsi disk13 */ 176 {134, 4, 224, "da"}, /* scsi disk14 */ 177 {135, 4, 240, "da"}, /* scsi disk15 */ 178 179 {202, 4, 0, "xbd"}, /* xbd */ 180 181 {0, 0, 0, NULL}, 182 }; 183 int major = vdevice >> 8; 184 int minor = vdevice & 0xff; 185 int i; 186 187 if (vdevice & (1 << 28)) { 188 *unit = (vdevice & ((1 << 28) - 1)) >> 8; 189 *name = "xbd"; 190 } 191 192 for (i = 0; info[i].major; i++) { 193 if (info[i].major == major) { 194 *unit = info[i].base + (minor >> info[i].shift); 195 *name = info[i].name; 196 return; 197 } 198 } 199 200 *unit = minor >> 4; 201 *name = "xbd"; 202 } 203 204 int 205 xlvbd_add(struct xb_softc *sc, blkif_sector_t capacity, 206 int vdevice, uint16_t vdisk_info, uint16_t sector_size) 207 { 208 int unit, error = 0; 209 const char *name; 210 211 blkfront_vdevice_to_unit(vdevice, &unit, &name); 212 213 sc->xb_unit = unit; 214 215 if (strcmp(name, "xbd")) 216 device_printf(sc->xb_dev, "attaching as %s%d\n", name, unit); 217 218 memset(&sc->xb_disk, 0, sizeof(sc->xb_disk)); 219 sc->xb_disk = disk_alloc(); 220 sc->xb_disk->d_unit = sc->xb_unit; 221 sc->xb_disk->d_open = blkif_open; 222 sc->xb_disk->d_close = blkif_close; 223 sc->xb_disk->d_ioctl = blkif_ioctl; 224 sc->xb_disk->d_strategy = xb_strategy; 225 sc->xb_disk->d_dump = xb_dump; 226 sc->xb_disk->d_name = name; 227 sc->xb_disk->d_drv1 = sc; 228 sc->xb_disk->d_sectorsize = sector_size; 229 230 sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT; 231 sc->xb_disk->d_maxsize = BLKIF_MAXIO; 232 sc->xb_disk->d_flags = 0; 233 disk_create(sc->xb_disk, DISK_VERSION_00); 234 235 return error; 236 } 237 238 void 239 xlvbd_del(struct xb_softc *sc) 240 { 241 242 disk_destroy(sc->xb_disk); 243 } 244 /************************ end VBD support *****************/ 245 246 /* 247 * Read/write routine for a buffer. Finds the proper unit, place it on 248 * the sortq and kick the controller. 249 */ 250 static void 251 xb_strategy(struct bio *bp) 252 { 253 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 254 255 /* bogus disk? */ 256 if (sc == NULL) { 257 bp->bio_error = EINVAL; 258 bp->bio_flags |= BIO_ERROR; 259 bp->bio_resid = bp->bio_bcount; 260 biodone(bp); 261 return; 262 } 263 264 /* 265 * Place it in the queue of disk activities for this disk 266 */ 267 mtx_lock(&sc->xb_io_lock); 268 269 xb_enqueue_bio(sc, bp); 270 xb_startio(sc); 271 272 mtx_unlock(&sc->xb_io_lock); 273 return; 274 } 275 276 static void 277 xb_bio_complete(struct xb_softc *sc, struct xb_command *cm) 278 { 279 struct bio *bp; 280 281 bp = cm->bp; 282 283 if ( unlikely(cm->status != BLKIF_RSP_OKAY) ) { 284 disk_err(bp, "disk error" , -1, 0); 285 printf(" status: %x\n", cm->status); 286 bp->bio_flags |= BIO_ERROR; 287 } 288 289 if (bp->bio_flags & BIO_ERROR) 290 bp->bio_error = EIO; 291 else 292 bp->bio_resid = 0; 293 294 xb_free_command(cm); 295 biodone(bp); 296 } 297 298 // Quiesce the disk writes for a dump file before allowing the next buffer. 299 static void 300 xb_quiesce(struct xb_softc *sc) 301 { 302 int mtd; 303 304 // While there are outstanding requests 305 while (!TAILQ_EMPTY(&sc->cm_busy)) { 306 RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, mtd); 307 if (mtd) { 308 /* Recieved request completions, update queue. */ 309 blkif_int(sc); 310 } 311 if (!TAILQ_EMPTY(&sc->cm_busy)) { 312 /* 313 * Still pending requests, wait for the disk i/o 314 * to complete. 315 */ 316 HYPERVISOR_yield(); 317 } 318 } 319 } 320 321 /* Kernel dump function for a paravirtualized disk device */ 322 static void 323 xb_dump_complete(struct xb_command *cm) 324 { 325 326 xb_enqueue_complete(cm); 327 } 328 329 static int 330 xb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, 331 size_t length) 332 { 333 struct disk *dp = arg; 334 struct xb_softc *sc = (struct xb_softc *) dp->d_drv1; 335 struct xb_command *cm; 336 size_t chunk; 337 int sbp; 338 int rc = 0; 339 340 if (length <= 0) 341 return (rc); 342 343 xb_quiesce(sc); /* All quiet on the western front. */ 344 345 /* 346 * If this lock is held, then this module is failing, and a 347 * successful kernel dump is highly unlikely anyway. 348 */ 349 mtx_lock(&sc->xb_io_lock); 350 351 /* Split the 64KB block as needed */ 352 for (sbp=0; length > 0; sbp++) { 353 cm = xb_dequeue_free(sc); 354 if (cm == NULL) { 355 mtx_unlock(&sc->xb_io_lock); 356 device_printf(sc->xb_dev, "dump: no more commands?\n"); 357 return (EBUSY); 358 } 359 360 if (gnttab_alloc_grant_references( 361 BLKIF_MAX_SEGMENTS_PER_REQUEST, &cm->gref_head) < 0) { 362 xb_free_command(cm); 363 mtx_unlock(&sc->xb_io_lock); 364 device_printf(sc->xb_dev, "no more grant allocs?\n"); 365 return (EBUSY); 366 } 367 368 chunk = length > BLKIF_MAXIO ? BLKIF_MAXIO : length; 369 cm->data = virtual; 370 cm->datalen = chunk; 371 cm->operation = BLKIF_OP_WRITE; 372 cm->sector_number = offset / dp->d_sectorsize; 373 cm->cm_complete = xb_dump_complete; 374 375 xb_enqueue_ready(cm); 376 377 length -= chunk; 378 offset += chunk; 379 virtual = (char *) virtual + chunk; 380 } 381 382 /* Tell DOM0 to do the I/O */ 383 xb_startio(sc); 384 mtx_unlock(&sc->xb_io_lock); 385 386 /* Poll for the completion. */ 387 xb_quiesce(sc); /* All quite on the eastern front */ 388 389 /* If there were any errors, bail out... */ 390 while ((cm = xb_dequeue_complete(sc)) != NULL) { 391 if (cm->status != BLKIF_RSP_OKAY) { 392 device_printf(sc->xb_dev, 393 "Dump I/O failed at sector %jd\n", 394 cm->sector_number); 395 rc = EIO; 396 } 397 xb_free_command(cm); 398 } 399 400 return (rc); 401 } 402 403 404 static int 405 blkfront_probe(device_t dev) 406 { 407 408 if (!strcmp(xenbus_get_type(dev), "vbd")) { 409 device_set_desc(dev, "Virtual Block Device"); 410 device_quiet(dev); 411 return (0); 412 } 413 414 return (ENXIO); 415 } 416 417 /* 418 * Setup supplies the backend dir, virtual device. We place an event 419 * channel and shared frame entries. We watch backend to wait if it's 420 * ok. 421 */ 422 static int 423 blkfront_attach(device_t dev) 424 { 425 struct xb_softc *sc; 426 struct xb_command *cm; 427 const char *name; 428 int error, vdevice, i, unit; 429 430 /* FIXME: Use dynamic device id if this is not set. */ 431 error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev), 432 "virtual-device", NULL, "%i", &vdevice); 433 if (error) { 434 xenbus_dev_fatal(dev, error, "reading virtual-device"); 435 printf("couldn't find virtual device"); 436 return (error); 437 } 438 439 blkfront_vdevice_to_unit(vdevice, &unit, &name); 440 if (!strcmp(name, "xbd")) 441 device_set_unit(dev, unit); 442 443 sc = device_get_softc(dev); 444 mtx_init(&sc->xb_io_lock, "blkfront i/o lock", NULL, MTX_DEF); 445 xb_initq_free(sc); 446 xb_initq_busy(sc); 447 xb_initq_ready(sc); 448 xb_initq_complete(sc); 449 xb_initq_bio(sc); 450 451 /* Allocate parent DMA tag */ 452 if (bus_dma_tag_create( NULL, /* parent */ 453 512, 4096, /* algnmnt, boundary */ 454 BUS_SPACE_MAXADDR, /* lowaddr */ 455 BUS_SPACE_MAXADDR, /* highaddr */ 456 NULL, NULL, /* filter, filterarg */ 457 BLKIF_MAXIO, /* maxsize */ 458 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* nsegments */ 459 PAGE_SIZE, /* maxsegsize */ 460 BUS_DMA_ALLOCNOW, /* flags */ 461 busdma_lock_mutex, /* lockfunc */ 462 &sc->xb_io_lock, /* lockarg */ 463 &sc->xb_io_dmat)) { 464 device_printf(dev, "Cannot allocate parent DMA tag\n"); 465 return (ENOMEM); 466 } 467 #ifdef notyet 468 if (bus_dma_tag_set(sc->xb_io_dmat, BUS_DMA_SET_MINSEGSZ, 469 XBD_SECTOR_SIZE)) { 470 device_printf(dev, "Cannot set sector size\n"); 471 return (EINVAL); 472 } 473 #endif 474 475 sc->xb_dev = dev; 476 sc->vdevice = vdevice; 477 sc->connected = BLKIF_STATE_DISCONNECTED; 478 479 /* work queue needed ? */ 480 for (i = 0; i < BLK_RING_SIZE; i++) { 481 cm = &sc->shadow[i]; 482 cm->req.id = i; 483 cm->cm_sc = sc; 484 if (bus_dmamap_create(sc->xb_io_dmat, 0, &cm->map) != 0) 485 break; 486 xb_free_command(cm); 487 } 488 489 /* Front end dir is a number, which is used as the id. */ 490 sc->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0); 491 492 error = talk_to_backend(sc); 493 if (error) 494 return (error); 495 496 return (0); 497 } 498 499 static int 500 blkfront_suspend(device_t dev) 501 { 502 struct xb_softc *sc = device_get_softc(dev); 503 504 /* Prevent new requests being issued until we fix things up. */ 505 mtx_lock(&sc->xb_io_lock); 506 sc->connected = BLKIF_STATE_SUSPENDED; 507 mtx_unlock(&sc->xb_io_lock); 508 509 return (0); 510 } 511 512 static int 513 blkfront_resume(device_t dev) 514 { 515 struct xb_softc *sc = device_get_softc(dev); 516 int err; 517 518 DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev)); 519 520 blkif_free(sc, 1); 521 err = talk_to_backend(sc); 522 if (sc->connected == BLKIF_STATE_SUSPENDED && !err) 523 blkif_recover(sc); 524 525 return (err); 526 } 527 528 /* Common code used when first setting up, and when resuming. */ 529 static int 530 talk_to_backend(struct xb_softc *sc) 531 { 532 device_t dev; 533 struct xenbus_transaction xbt; 534 const char *message = NULL; 535 int err; 536 537 /* Create shared ring, alloc event channel. */ 538 dev = sc->xb_dev; 539 err = setup_blkring(sc); 540 if (err) 541 goto out; 542 543 again: 544 err = xenbus_transaction_start(&xbt); 545 if (err) { 546 xenbus_dev_fatal(dev, err, "starting transaction"); 547 goto destroy_blkring; 548 } 549 550 err = xenbus_printf(xbt, xenbus_get_node(dev), 551 "ring-ref","%u", sc->ring_ref); 552 if (err) { 553 message = "writing ring-ref"; 554 goto abort_transaction; 555 } 556 err = xenbus_printf(xbt, xenbus_get_node(dev), 557 "event-channel", "%u", irq_to_evtchn_port(sc->irq)); 558 if (err) { 559 message = "writing event-channel"; 560 goto abort_transaction; 561 } 562 err = xenbus_printf(xbt, xenbus_get_node(dev), 563 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); 564 if (err) { 565 message = "writing protocol"; 566 goto abort_transaction; 567 } 568 569 err = xenbus_transaction_end(xbt, 0); 570 if (err) { 571 if (err == EAGAIN) 572 goto again; 573 xenbus_dev_fatal(dev, err, "completing transaction"); 574 goto destroy_blkring; 575 } 576 xenbus_set_state(dev, XenbusStateInitialised); 577 578 return 0; 579 580 abort_transaction: 581 xenbus_transaction_end(xbt, 1); 582 if (message) 583 xenbus_dev_fatal(dev, err, "%s", message); 584 destroy_blkring: 585 blkif_free(sc, 0); 586 out: 587 return err; 588 } 589 590 static int 591 setup_blkring(struct xb_softc *sc) 592 { 593 blkif_sring_t *sring; 594 int error; 595 596 sc->ring_ref = GRANT_INVALID_REF; 597 598 sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); 599 if (sring == NULL) { 600 xenbus_dev_fatal(sc->xb_dev, ENOMEM, "allocating shared ring"); 601 return ENOMEM; 602 } 603 SHARED_RING_INIT(sring); 604 FRONT_RING_INIT(&sc->ring, sring, PAGE_SIZE); 605 606 error = xenbus_grant_ring(sc->xb_dev, 607 (vtomach(sc->ring.sring) >> PAGE_SHIFT), &sc->ring_ref); 608 if (error) { 609 free(sring, M_DEVBUF); 610 sc->ring.sring = NULL; 611 goto fail; 612 } 613 614 error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(sc->xb_dev), 615 "xbd", (driver_intr_t *)blkif_int, sc, 616 INTR_TYPE_BIO | INTR_MPSAFE, &sc->irq); 617 if (error) { 618 xenbus_dev_fatal(sc->xb_dev, error, 619 "bind_evtchn_to_irqhandler failed"); 620 goto fail; 621 } 622 623 return (0); 624 fail: 625 blkif_free(sc, 0); 626 return (error); 627 } 628 629 630 /** 631 * Callback received when the backend's state changes. 632 */ 633 static int 634 blkfront_backend_changed(device_t dev, XenbusState backend_state) 635 { 636 struct xb_softc *sc = device_get_softc(dev); 637 638 DPRINTK("backend_state=%d\n", backend_state); 639 640 switch (backend_state) { 641 case XenbusStateUnknown: 642 case XenbusStateInitialising: 643 case XenbusStateInitWait: 644 case XenbusStateInitialised: 645 case XenbusStateClosed: 646 case XenbusStateReconfigured: 647 case XenbusStateReconfiguring: 648 break; 649 650 case XenbusStateConnected: 651 connect(sc); 652 break; 653 654 case XenbusStateClosing: 655 if (sc->users > 0) 656 xenbus_dev_error(dev, -EBUSY, 657 "Device in use; refusing to close"); 658 else 659 blkfront_closing(dev); 660 #ifdef notyet 661 bd = bdget(sc->dev); 662 if (bd == NULL) 663 xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); 664 665 down(&bd->bd_sem); 666 if (sc->users > 0) 667 xenbus_dev_error(dev, -EBUSY, 668 "Device in use; refusing to close"); 669 else 670 blkfront_closing(dev); 671 up(&bd->bd_sem); 672 bdput(bd); 673 #endif 674 } 675 676 return (0); 677 } 678 679 /* 680 ** Invoked when the backend is finally 'ready' (and has told produced 681 ** the details about the physical device - #sectors, size, etc). 682 */ 683 static void 684 connect(struct xb_softc *sc) 685 { 686 device_t dev = sc->xb_dev; 687 unsigned long sectors, sector_size; 688 unsigned int binfo; 689 int err, feature_barrier; 690 691 if( (sc->connected == BLKIF_STATE_CONNECTED) || 692 (sc->connected == BLKIF_STATE_SUSPENDED) ) 693 return; 694 695 DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); 696 697 err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev), 698 "sectors", "%lu", §ors, 699 "info", "%u", &binfo, 700 "sector-size", "%lu", §or_size, 701 NULL); 702 if (err) { 703 xenbus_dev_fatal(dev, err, 704 "reading backend fields at %s", 705 xenbus_get_otherend_path(dev)); 706 return; 707 } 708 err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev), 709 "feature-barrier", "%lu", &feature_barrier, 710 NULL); 711 if (!err || feature_barrier) 712 sc->xb_flags |= XB_BARRIER; 713 714 device_printf(dev, "%juMB <%s> at %s", 715 (uintmax_t) sectors / (1048576 / sector_size), 716 device_get_desc(dev), 717 xenbus_get_node(dev)); 718 bus_print_child_footer(device_get_parent(dev), dev); 719 720 xlvbd_add(sc, sectors, sc->vdevice, binfo, sector_size); 721 722 (void)xenbus_set_state(dev, XenbusStateConnected); 723 724 /* Kick pending requests. */ 725 mtx_lock(&sc->xb_io_lock); 726 sc->connected = BLKIF_STATE_CONNECTED; 727 xb_startio(sc); 728 sc->xb_flags |= XB_READY; 729 mtx_unlock(&sc->xb_io_lock); 730 731 } 732 733 /** 734 * Handle the change of state of the backend to Closing. We must delete our 735 * device-layer structures now, to ensure that writes are flushed through to 736 * the backend. Once this is done, we can switch to Closed in 737 * acknowledgement. 738 */ 739 static void 740 blkfront_closing(device_t dev) 741 { 742 struct xb_softc *sc = device_get_softc(dev); 743 744 DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev)); 745 746 if (sc->mi) { 747 DPRINTK("Calling xlvbd_del\n"); 748 xlvbd_del(sc); 749 sc->mi = NULL; 750 } 751 752 xenbus_set_state(dev, XenbusStateClosed); 753 } 754 755 756 static int 757 blkfront_detach(device_t dev) 758 { 759 struct xb_softc *sc = device_get_softc(dev); 760 761 DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev)); 762 763 blkif_free(sc, 0); 764 mtx_destroy(&sc->xb_io_lock); 765 766 return 0; 767 } 768 769 770 static inline void 771 flush_requests(struct xb_softc *sc) 772 { 773 int notify; 774 775 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->ring, notify); 776 777 if (notify) 778 notify_remote_via_irq(sc->irq); 779 } 780 781 static void blkif_restart_queue_callback(void *arg) 782 { 783 struct xb_softc *sc = arg; 784 785 xb_startio(sc); 786 } 787 788 static int 789 blkif_open(struct disk *dp) 790 { 791 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 792 793 if (sc == NULL) { 794 printf("xb%d: not found", sc->xb_unit); 795 return (ENXIO); 796 } 797 798 sc->xb_flags |= XB_OPEN; 799 sc->users++; 800 return (0); 801 } 802 803 static int 804 blkif_close(struct disk *dp) 805 { 806 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 807 808 if (sc == NULL) 809 return (ENXIO); 810 sc->xb_flags &= ~XB_OPEN; 811 if (--(sc->users) == 0) { 812 /* Check whether we have been instructed to close. We will 813 have ignored this request initially, as the device was 814 still mounted. */ 815 device_t dev = sc->xb_dev; 816 XenbusState state = 817 xenbus_read_driver_state(xenbus_get_otherend_path(dev)); 818 819 if (state == XenbusStateClosing) 820 blkfront_closing(dev); 821 } 822 return (0); 823 } 824 825 static int 826 blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) 827 { 828 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 829 830 if (sc == NULL) 831 return (ENXIO); 832 833 return (ENOTTY); 834 } 835 836 static void 837 xb_free_command(struct xb_command *cm) 838 { 839 840 KASSERT((cm->cm_flags & XB_ON_XBQ_MASK) == 0, 841 ("Freeing command that is still on a queue\n")); 842 843 cm->cm_flags = 0; 844 cm->bp = NULL; 845 cm->cm_complete = NULL; 846 xb_enqueue_free(cm); 847 } 848 849 /* 850 * blkif_queue_request 851 * 852 * request block io 853 * 854 * id: for guest use only. 855 * operation: BLKIF_OP_{READ,WRITE,PROBE} 856 * buffer: buffer to read/write into. this should be a 857 * virtual address in the guest os. 858 */ 859 static struct xb_command * 860 xb_bio_command(struct xb_softc *sc) 861 { 862 struct xb_command *cm; 863 struct bio *bp; 864 865 if (unlikely(sc->connected != BLKIF_STATE_CONNECTED)) 866 return (NULL); 867 868 bp = xb_dequeue_bio(sc); 869 if (bp == NULL) 870 return (NULL); 871 872 if ((cm = xb_dequeue_free(sc)) == NULL) { 873 xb_requeue_bio(sc, bp); 874 return (NULL); 875 } 876 877 if (gnttab_alloc_grant_references(BLKIF_MAX_SEGMENTS_PER_REQUEST, 878 &cm->gref_head) < 0) { 879 gnttab_request_free_callback(&sc->callback, 880 blkif_restart_queue_callback, sc, 881 BLKIF_MAX_SEGMENTS_PER_REQUEST); 882 xb_requeue_bio(sc, bp); 883 xb_enqueue_free(cm); 884 sc->xb_flags |= XB_FROZEN; 885 return (NULL); 886 } 887 888 /* XXX Can we grab refs before doing the load so that the ref can 889 * be filled out here? 890 */ 891 cm->bp = bp; 892 cm->data = bp->bio_data; 893 cm->datalen = bp->bio_bcount; 894 cm->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ : 895 BLKIF_OP_WRITE; 896 cm->sector_number = (blkif_sector_t)bp->bio_pblkno; 897 898 return (cm); 899 } 900 901 static int 902 blkif_queue_request(struct xb_softc *sc, struct xb_command *cm) 903 { 904 int error; 905 906 error = bus_dmamap_load(sc->xb_io_dmat, cm->map, cm->data, cm->datalen, 907 blkif_queue_cb, cm, 0); 908 if (error == EINPROGRESS) { 909 printf("EINPROGRESS\n"); 910 sc->xb_flags |= XB_FROZEN; 911 cm->cm_flags |= XB_CMD_FROZEN; 912 return (0); 913 } 914 915 return (error); 916 } 917 918 static void 919 blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 920 { 921 struct xb_softc *sc; 922 struct xb_command *cm; 923 blkif_request_t *ring_req; 924 vm_paddr_t buffer_ma; 925 uint64_t fsect, lsect; 926 int ref, i, op; 927 928 cm = arg; 929 sc = cm->cm_sc; 930 931 if (error) { 932 printf("error %d in blkif_queue_cb\n", error); 933 cm->bp->bio_error = EIO; 934 biodone(cm->bp); 935 xb_free_command(cm); 936 return; 937 } 938 939 /* Fill out a communications ring structure. */ 940 ring_req = RING_GET_REQUEST(&sc->ring, sc->ring.req_prod_pvt); 941 if (ring_req == NULL) { 942 /* XXX Is this possible? */ 943 printf("ring_req NULL, requeuing\n"); 944 xb_enqueue_ready(cm); 945 return; 946 } 947 ring_req->id = cm->req.id; 948 ring_req->operation = cm->operation; 949 ring_req->sector_number = cm->sector_number; 950 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk; 951 ring_req->nr_segments = nsegs; 952 953 for (i = 0; i < nsegs; i++) { 954 buffer_ma = segs[i].ds_addr; 955 fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; 956 lsect = fsect + (segs[i].ds_len >> XBD_SECTOR_SHFT) - 1; 957 958 KASSERT(lsect <= 7, 959 ("XEN disk driver data cannot cross a page boundary")); 960 961 /* install a grant reference. */ 962 ref = gnttab_claim_grant_reference(&cm->gref_head); 963 KASSERT( ref >= 0, ("grant_reference failed") ); 964 965 gnttab_grant_foreign_access_ref( 966 ref, 967 xenbus_get_otherend_id(sc->xb_dev), 968 buffer_ma >> PAGE_SHIFT, 969 ring_req->operation & 1 ); /* ??? */ 970 971 ring_req->seg[i] = 972 (struct blkif_request_segment) { 973 .gref = ref, 974 .first_sect = fsect, 975 .last_sect = lsect }; 976 } 977 978 979 if (cm->operation == BLKIF_OP_READ) 980 op = BUS_DMASYNC_PREREAD; 981 else if (cm->operation == BLKIF_OP_WRITE) 982 op = BUS_DMASYNC_PREWRITE; 983 else 984 op = 0; 985 bus_dmamap_sync(sc->xb_io_dmat, cm->map, op); 986 987 sc->ring.req_prod_pvt++; 988 989 /* Keep a private copy so we can reissue requests when recovering. */ 990 cm->req = *ring_req; 991 992 xb_enqueue_busy(cm); 993 994 gnttab_free_grant_references(cm->gref_head); 995 996 /* 997 * This flag means that we're probably executing in the busdma swi 998 * instead of in the startio context, so an explicit flush is needed. 999 */ 1000 if (cm->cm_flags & XB_CMD_FROZEN) 1001 flush_requests(sc); 1002 1003 return; 1004 } 1005 1006 /* 1007 * Dequeue buffers and place them in the shared communication ring. 1008 * Return when no more requests can be accepted or all buffers have 1009 * been queued. 1010 * 1011 * Signal XEN once the ring has been filled out. 1012 */ 1013 static void 1014 xb_startio(struct xb_softc *sc) 1015 { 1016 struct xb_command *cm; 1017 int error, queued = 0; 1018 1019 mtx_assert(&sc->xb_io_lock, MA_OWNED); 1020 1021 while (!RING_FULL(&sc->ring)) { 1022 if (sc->xb_flags & XB_FROZEN) 1023 break; 1024 1025 cm = xb_dequeue_ready(sc); 1026 1027 if (cm == NULL) 1028 cm = xb_bio_command(sc); 1029 1030 if (cm == NULL) 1031 break; 1032 1033 if ((error = blkif_queue_request(sc, cm)) != 0) { 1034 printf("blkif_queue_request returned %d\n", error); 1035 break; 1036 } 1037 queued++; 1038 } 1039 1040 if (queued != 0) 1041 flush_requests(sc); 1042 } 1043 1044 static void 1045 blkif_int(void *xsc) 1046 { 1047 struct xb_softc *sc = xsc; 1048 struct xb_command *cm; 1049 blkif_response_t *bret; 1050 RING_IDX i, rp; 1051 int op; 1052 1053 mtx_lock(&sc->xb_io_lock); 1054 1055 if (unlikely(sc->connected != BLKIF_STATE_CONNECTED)) { 1056 mtx_unlock(&sc->xb_io_lock); 1057 return; 1058 } 1059 1060 again: 1061 rp = sc->ring.sring->rsp_prod; 1062 rmb(); /* Ensure we see queued responses up to 'rp'. */ 1063 1064 for (i = sc->ring.rsp_cons; i != rp; i++) { 1065 bret = RING_GET_RESPONSE(&sc->ring, i); 1066 cm = &sc->shadow[bret->id]; 1067 1068 xb_remove_busy(cm); 1069 blkif_completion(cm); 1070 1071 if (cm->operation == BLKIF_OP_READ) 1072 op = BUS_DMASYNC_POSTREAD; 1073 else if (cm->operation == BLKIF_OP_WRITE) 1074 op = BUS_DMASYNC_POSTWRITE; 1075 else 1076 op = 0; 1077 bus_dmamap_sync(sc->xb_io_dmat, cm->map, op); 1078 bus_dmamap_unload(sc->xb_io_dmat, cm->map); 1079 1080 /* 1081 * If commands are completing then resources are probably 1082 * being freed as well. It's a cheap assumption even when 1083 * wrong. 1084 */ 1085 sc->xb_flags &= ~XB_FROZEN; 1086 1087 /* 1088 * Directly call the i/o complete routine to save an 1089 * an indirection in the common case. 1090 */ 1091 cm->status = bret->status; 1092 if (cm->bp) 1093 xb_bio_complete(sc, cm); 1094 else if (cm->cm_complete) 1095 (cm->cm_complete)(cm); 1096 else 1097 xb_free_command(cm); 1098 } 1099 1100 sc->ring.rsp_cons = i; 1101 1102 if (i != sc->ring.req_prod_pvt) { 1103 int more_to_do; 1104 RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, more_to_do); 1105 if (more_to_do) 1106 goto again; 1107 } else { 1108 sc->ring.sring->rsp_event = i + 1; 1109 } 1110 1111 xb_startio(sc); 1112 1113 mtx_unlock(&sc->xb_io_lock); 1114 } 1115 1116 static void 1117 blkif_free(struct xb_softc *sc, int suspend) 1118 { 1119 1120 /* Prevent new requests being issued until we fix things up. */ 1121 mtx_lock(&sc->xb_io_lock); 1122 sc->connected = suspend ? 1123 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 1124 mtx_unlock(&sc->xb_io_lock); 1125 1126 /* Free resources associated with old device channel. */ 1127 if (sc->ring_ref != GRANT_INVALID_REF) { 1128 gnttab_end_foreign_access(sc->ring_ref, 1129 sc->ring.sring); 1130 sc->ring_ref = GRANT_INVALID_REF; 1131 sc->ring.sring = NULL; 1132 } 1133 if (sc->irq) 1134 unbind_from_irqhandler(sc->irq); 1135 sc->irq = 0; 1136 1137 } 1138 1139 static void 1140 blkif_completion(struct xb_command *s) 1141 { 1142 int i; 1143 1144 for (i = 0; i < s->req.nr_segments; i++) 1145 gnttab_end_foreign_access(s->req.seg[i].gref, 0UL); 1146 } 1147 1148 static void 1149 blkif_recover(struct xb_softc *sc) 1150 { 1151 /* 1152 * XXX The whole concept of not quiescing and completing all i/o 1153 * during suspend, and then hoping to recover and replay the 1154 * resulting abandoned I/O during resume, is laughable. At best, 1155 * it invalidates the i/o ordering rules required by just about 1156 * every filesystem, and at worst it'll corrupt data. The code 1157 * has been removed until further notice. 1158 */ 1159 } 1160 1161 /* ** Driver registration ** */ 1162 static device_method_t blkfront_methods[] = { 1163 /* Device interface */ 1164 DEVMETHOD(device_probe, blkfront_probe), 1165 DEVMETHOD(device_attach, blkfront_attach), 1166 DEVMETHOD(device_detach, blkfront_detach), 1167 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1168 DEVMETHOD(device_suspend, blkfront_suspend), 1169 DEVMETHOD(device_resume, blkfront_resume), 1170 1171 /* Xenbus interface */ 1172 DEVMETHOD(xenbus_backend_changed, blkfront_backend_changed), 1173 1174 { 0, 0 } 1175 }; 1176 1177 static driver_t blkfront_driver = { 1178 "xbd", 1179 blkfront_methods, 1180 sizeof(struct xb_softc), 1181 }; 1182 devclass_t blkfront_devclass; 1183 1184 DRIVER_MODULE(xbd, xenbus, blkfront_driver, blkfront_devclass, 0, 0); 1185