1 /* 2 * XenBSD block device driver 3 * 4 * Copyright (c) 2009 Frank Suchomel, Citrix 5 * Copyright (c) 2009 Doug F. Rabson, Citrix 6 * Copyright (c) 2005 Kip Macy 7 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 8 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 9 * 10 * 11 * Permission is hereby granted, free of charge, to any person obtaining a copy 12 * of this software and associated documentation files (the "Software"), to 13 * deal in the Software without restriction, including without limitation the 14 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 15 * sell copies of the Software, and to permit persons to whom the Software is 16 * furnished to do so, subject to the following conditions: 17 * 18 * The above copyright notice and this permission notice shall be included in 19 * all copies or substantial portions of the Software. 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 26 * DEALINGS IN THE SOFTWARE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/malloc.h> 35 #include <sys/kernel.h> 36 #include <vm/vm.h> 37 #include <vm/pmap.h> 38 39 #include <sys/bio.h> 40 #include <sys/bus.h> 41 #include <sys/conf.h> 42 #include <sys/module.h> 43 44 #include <machine/bus.h> 45 #include <sys/rman.h> 46 #include <machine/resource.h> 47 #include <machine/intr_machdep.h> 48 #include <machine/vmparam.h> 49 50 #include <machine/xen/xen-os.h> 51 #include <machine/xen/xenfunc.h> 52 #include <xen/hypervisor.h> 53 #include <xen/xen_intr.h> 54 #include <xen/evtchn.h> 55 #include <xen/gnttab.h> 56 #include <xen/interface/grant_table.h> 57 #include <xen/interface/io/protocols.h> 58 #include <xen/xenbus/xenbusvar.h> 59 60 #include <geom/geom_disk.h> 61 62 #include <dev/xen/blkfront/block.h> 63 64 #include "xenbus_if.h" 65 66 #define ASSERT(S) KASSERT(S, (#S)) 67 /* prototypes */ 68 struct xb_softc; 69 static void xb_startio(struct xb_softc *sc); 70 static void connect(device_t, struct blkfront_info *); 71 static void blkfront_closing(device_t); 72 static int blkfront_detach(device_t); 73 static int talk_to_backend(device_t, struct blkfront_info *); 74 static int setup_blkring(device_t, struct blkfront_info *); 75 static void blkif_int(void *); 76 #if 0 77 static void blkif_restart_queue(void *arg); 78 #endif 79 static void blkif_recover(struct blkfront_info *); 80 static void blkif_completion(struct blk_shadow *); 81 static void blkif_free(struct blkfront_info *, int); 82 83 #define GRANT_INVALID_REF 0 84 #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) 85 86 LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head; 87 88 /* Control whether runtime update of vbds is enabled. */ 89 #define ENABLE_VBD_UPDATE 0 90 91 #if ENABLE_VBD_UPDATE 92 static void vbd_update(void); 93 #endif 94 95 96 #define BLKIF_STATE_DISCONNECTED 0 97 #define BLKIF_STATE_CONNECTED 1 98 #define BLKIF_STATE_SUSPENDED 2 99 100 #ifdef notyet 101 static char *blkif_state_name[] = { 102 [BLKIF_STATE_DISCONNECTED] = "disconnected", 103 [BLKIF_STATE_CONNECTED] = "connected", 104 [BLKIF_STATE_SUSPENDED] = "closed", 105 }; 106 107 static char * blkif_status_name[] = { 108 [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", 109 [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", 110 [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", 111 [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", 112 }; 113 #endif 114 #define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args) 115 #if 0 116 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args) 117 #else 118 #define DPRINTK(fmt, args...) 119 #endif 120 121 static grant_ref_t gref_head; 122 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ 123 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) 124 125 static void kick_pending_request_queues(struct blkfront_info *); 126 static int blkif_open(struct disk *dp); 127 static int blkif_close(struct disk *dp); 128 static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td); 129 static int blkif_queue_request(struct bio *bp); 130 static void xb_strategy(struct bio *bp); 131 132 // In order to quiesce the device during kernel dumps, outstanding requests to 133 // DOM0 for disk reads/writes need to be accounted for. 134 static int blkif_queued_requests; 135 static int xb_dump(void *, void *, vm_offset_t, off_t, size_t); 136 137 138 /* XXX move to xb_vbd.c when VBD update support is added */ 139 #define MAX_VBDS 64 140 141 #define XBD_SECTOR_SIZE 512 /* XXX: assume for now */ 142 #define XBD_SECTOR_SHFT 9 143 144 static struct mtx blkif_io_lock; 145 146 static vm_paddr_t 147 pfn_to_mfn(vm_paddr_t pfn) 148 { 149 return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT); 150 } 151 152 /* 153 * Translate Linux major/minor to an appropriate name and unit 154 * number. For HVM guests, this allows us to use the same drive names 155 * with blkfront as the emulated drives, easing transition slightly. 156 */ 157 static void 158 blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name) 159 { 160 static struct vdev_info { 161 int major; 162 int shift; 163 int base; 164 const char *name; 165 } info[] = { 166 {3, 6, 0, "ad"}, /* ide0 */ 167 {22, 6, 2, "ad"}, /* ide1 */ 168 {33, 6, 4, "ad"}, /* ide2 */ 169 {34, 6, 6, "ad"}, /* ide3 */ 170 {56, 6, 8, "ad"}, /* ide4 */ 171 {57, 6, 10, "ad"}, /* ide5 */ 172 {88, 6, 12, "ad"}, /* ide6 */ 173 {89, 6, 14, "ad"}, /* ide7 */ 174 {90, 6, 16, "ad"}, /* ide8 */ 175 {91, 6, 18, "ad"}, /* ide9 */ 176 177 {8, 4, 0, "da"}, /* scsi disk0 */ 178 {65, 4, 16, "da"}, /* scsi disk1 */ 179 {66, 4, 32, "da"}, /* scsi disk2 */ 180 {67, 4, 48, "da"}, /* scsi disk3 */ 181 {68, 4, 64, "da"}, /* scsi disk4 */ 182 {69, 4, 80, "da"}, /* scsi disk5 */ 183 {70, 4, 96, "da"}, /* scsi disk6 */ 184 {71, 4, 112, "da"}, /* scsi disk7 */ 185 {128, 4, 128, "da"}, /* scsi disk8 */ 186 {129, 4, 144, "da"}, /* scsi disk9 */ 187 {130, 4, 160, "da"}, /* scsi disk10 */ 188 {131, 4, 176, "da"}, /* scsi disk11 */ 189 {132, 4, 192, "da"}, /* scsi disk12 */ 190 {133, 4, 208, "da"}, /* scsi disk13 */ 191 {134, 4, 224, "da"}, /* scsi disk14 */ 192 {135, 4, 240, "da"}, /* scsi disk15 */ 193 194 {202, 4, 0, "xbd"}, /* xbd */ 195 196 {0, 0, 0, NULL}, 197 }; 198 int major = vdevice >> 8; 199 int minor = vdevice & 0xff; 200 int i; 201 202 if (vdevice & (1 << 28)) { 203 *unit = (vdevice & ((1 << 28) - 1)) >> 8; 204 *name = "xbd"; 205 } 206 207 for (i = 0; info[i].major; i++) { 208 if (info[i].major == major) { 209 *unit = info[i].base + (minor >> info[i].shift); 210 *name = info[i].name; 211 return; 212 } 213 } 214 215 *unit = minor >> 4; 216 *name = "xbd"; 217 } 218 219 int 220 xlvbd_add(device_t dev, blkif_sector_t capacity, 221 int vdevice, uint16_t vdisk_info, uint16_t sector_size, 222 struct blkfront_info *info) 223 { 224 struct xb_softc *sc; 225 int unit, error = 0; 226 const char *name; 227 228 blkfront_vdevice_to_unit(vdevice, &unit, &name); 229 230 sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 231 sc->xb_unit = unit; 232 sc->xb_info = info; 233 info->sc = sc; 234 235 if (strcmp(name, "xbd")) 236 device_printf(dev, "attaching as %s%d\n", name, unit); 237 238 memset(&sc->xb_disk, 0, sizeof(sc->xb_disk)); 239 sc->xb_disk = disk_alloc(); 240 sc->xb_disk->d_unit = sc->xb_unit; 241 sc->xb_disk->d_open = blkif_open; 242 sc->xb_disk->d_close = blkif_close; 243 sc->xb_disk->d_ioctl = blkif_ioctl; 244 sc->xb_disk->d_strategy = xb_strategy; 245 sc->xb_disk->d_dump = xb_dump; 246 sc->xb_disk->d_name = name; 247 sc->xb_disk->d_drv1 = sc; 248 sc->xb_disk->d_sectorsize = sector_size; 249 250 /* XXX */ 251 sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT; 252 #if 0 253 sc->xb_disk->d_maxsize = DFLTPHYS; 254 #else /* XXX: xen can't handle large single i/o requests */ 255 sc->xb_disk->d_maxsize = 4096; 256 #endif 257 #ifdef notyet 258 XENPRINTF("attaching device 0x%x unit %d capacity %llu\n", 259 xb_diskinfo[sc->xb_unit].device, sc->xb_unit, 260 sc->xb_disk->d_mediasize); 261 #endif 262 sc->xb_disk->d_flags = 0; 263 disk_create(sc->xb_disk, DISK_VERSION_00); 264 bioq_init(&sc->xb_bioq); 265 266 return error; 267 } 268 269 void 270 xlvbd_del(struct blkfront_info *info) 271 { 272 struct xb_softc *sc; 273 274 sc = info->sc; 275 disk_destroy(sc->xb_disk); 276 } 277 /************************ end VBD support *****************/ 278 279 /* 280 * Read/write routine for a buffer. Finds the proper unit, place it on 281 * the sortq and kick the controller. 282 */ 283 static void 284 xb_strategy(struct bio *bp) 285 { 286 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 287 288 /* bogus disk? */ 289 if (sc == NULL) { 290 bp->bio_error = EINVAL; 291 bp->bio_flags |= BIO_ERROR; 292 goto bad; 293 } 294 295 DPRINTK(""); 296 297 /* 298 * Place it in the queue of disk activities for this disk 299 */ 300 mtx_lock(&blkif_io_lock); 301 302 bioq_disksort(&sc->xb_bioq, bp); 303 xb_startio(sc); 304 305 mtx_unlock(&blkif_io_lock); 306 return; 307 308 bad: 309 /* 310 * Correctly set the bio to indicate a failed tranfer. 311 */ 312 bp->bio_resid = bp->bio_bcount; 313 biodone(bp); 314 return; 315 } 316 317 static void xb_quiesce(struct blkfront_info *info); 318 // Quiesce the disk writes for a dump file before allowing the next buffer. 319 static void 320 xb_quiesce(struct blkfront_info *info) 321 { 322 int mtd; 323 324 // While there are outstanding requests 325 while (blkif_queued_requests) { 326 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, mtd); 327 if (mtd) { 328 // Recieved request completions, update queue. 329 blkif_int(info); 330 } 331 if (blkif_queued_requests) { 332 // Still pending requests, wait for the disk i/o to complete 333 HYPERVISOR_yield(); 334 } 335 } 336 } 337 338 // Some bio structures for dumping core 339 #define DUMP_BIO_NO 16 // 16 * 4KB = 64KB dump block 340 static struct bio xb_dump_bp[DUMP_BIO_NO]; 341 342 // Kernel dump function for a paravirtualized disk device 343 static int 344 xb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, 345 size_t length) 346 { 347 int sbp; 348 int mbp; 349 size_t chunk; 350 struct disk *dp = arg; 351 struct xb_softc *sc = (struct xb_softc *) dp->d_drv1; 352 int rc = 0; 353 354 xb_quiesce(sc->xb_info); // All quiet on the western front. 355 if (length > 0) { 356 // If this lock is held, then this module is failing, and a successful 357 // kernel dump is highly unlikely anyway. 358 mtx_lock(&blkif_io_lock); 359 // Split the 64KB block into 16 4KB blocks 360 for (sbp=0; length>0 && sbp<DUMP_BIO_NO; sbp++) { 361 chunk = length > PAGE_SIZE ? PAGE_SIZE : length; 362 xb_dump_bp[sbp].bio_disk = dp; 363 xb_dump_bp[sbp].bio_pblkno = offset / dp->d_sectorsize; 364 xb_dump_bp[sbp].bio_bcount = chunk; 365 xb_dump_bp[sbp].bio_resid = chunk; 366 xb_dump_bp[sbp].bio_data = virtual; 367 xb_dump_bp[sbp].bio_cmd = BIO_WRITE; 368 xb_dump_bp[sbp].bio_done = NULL; 369 370 bioq_disksort(&sc->xb_bioq, &xb_dump_bp[sbp]); 371 372 length -= chunk; 373 offset += chunk; 374 virtual = (char *) virtual + chunk; 375 } 376 // Tell DOM0 to do the I/O 377 xb_startio(sc); 378 mtx_unlock(&blkif_io_lock); 379 380 // Must wait for the completion: the dump routine reuses the same 381 // 16 x 4KB buffer space. 382 xb_quiesce(sc->xb_info); // All quite on the eastern front 383 // If there were any errors, bail out... 384 for (mbp=0; mbp<sbp; mbp++) { 385 if ((rc = xb_dump_bp[mbp].bio_error)) break; 386 } 387 } 388 return (rc); 389 } 390 391 392 static int 393 blkfront_probe(device_t dev) 394 { 395 396 if (!strcmp(xenbus_get_type(dev), "vbd")) { 397 device_set_desc(dev, "Virtual Block Device"); 398 device_quiet(dev); 399 return (0); 400 } 401 402 return (ENXIO); 403 } 404 405 /* 406 * Setup supplies the backend dir, virtual device. We place an event 407 * channel and shared frame entries. We watch backend to wait if it's 408 * ok. 409 */ 410 static int 411 blkfront_attach(device_t dev) 412 { 413 int error, vdevice, i, unit; 414 struct blkfront_info *info; 415 const char *name; 416 417 /* FIXME: Use dynamic device id if this is not set. */ 418 error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev), 419 "virtual-device", NULL, "%i", &vdevice); 420 if (error) { 421 xenbus_dev_fatal(dev, error, "reading virtual-device"); 422 printf("couldn't find virtual device"); 423 return (error); 424 } 425 426 blkfront_vdevice_to_unit(vdevice, &unit, &name); 427 if (!strcmp(name, "xbd")) 428 device_set_unit(dev, unit); 429 430 info = device_get_softc(dev); 431 432 /* 433 * XXX debug only 434 */ 435 for (i = 0; i < sizeof(*info); i++) 436 if (((uint8_t *)info)[i] != 0) 437 panic("non-null memory"); 438 439 info->shadow_free = 0; 440 info->xbdev = dev; 441 info->vdevice = vdevice; 442 info->connected = BLKIF_STATE_DISCONNECTED; 443 444 /* work queue needed ? */ 445 for (i = 0; i < BLK_RING_SIZE; i++) 446 info->shadow[i].req.id = i+1; 447 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 448 449 /* Front end dir is a number, which is used as the id. */ 450 info->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0); 451 452 error = talk_to_backend(dev, info); 453 if (error) 454 return (error); 455 456 return (0); 457 } 458 459 static int 460 blkfront_suspend(device_t dev) 461 { 462 struct blkfront_info *info = device_get_softc(dev); 463 464 /* Prevent new requests being issued until we fix things up. */ 465 mtx_lock(&blkif_io_lock); 466 info->connected = BLKIF_STATE_SUSPENDED; 467 mtx_unlock(&blkif_io_lock); 468 469 return (0); 470 } 471 472 static int 473 blkfront_resume(device_t dev) 474 { 475 struct blkfront_info *info = device_get_softc(dev); 476 int err; 477 478 DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev)); 479 480 blkif_free(info, 1); 481 err = talk_to_backend(dev, info); 482 if (info->connected == BLKIF_STATE_SUSPENDED && !err) 483 blkif_recover(info); 484 485 return (err); 486 } 487 488 /* Common code used when first setting up, and when resuming. */ 489 static int 490 talk_to_backend(device_t dev, struct blkfront_info *info) 491 { 492 const char *message = NULL; 493 struct xenbus_transaction xbt; 494 int err; 495 496 /* Create shared ring, alloc event channel. */ 497 err = setup_blkring(dev, info); 498 if (err) 499 goto out; 500 501 again: 502 err = xenbus_transaction_start(&xbt); 503 if (err) { 504 xenbus_dev_fatal(dev, err, "starting transaction"); 505 goto destroy_blkring; 506 } 507 508 err = xenbus_printf(xbt, xenbus_get_node(dev), 509 "ring-ref","%u", info->ring_ref); 510 if (err) { 511 message = "writing ring-ref"; 512 goto abort_transaction; 513 } 514 err = xenbus_printf(xbt, xenbus_get_node(dev), 515 "event-channel", "%u", irq_to_evtchn_port(info->irq)); 516 if (err) { 517 message = "writing event-channel"; 518 goto abort_transaction; 519 } 520 err = xenbus_printf(xbt, xenbus_get_node(dev), 521 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); 522 if (err) { 523 message = "writing protocol"; 524 goto abort_transaction; 525 } 526 527 err = xenbus_transaction_end(xbt, 0); 528 if (err) { 529 if (err == EAGAIN) 530 goto again; 531 xenbus_dev_fatal(dev, err, "completing transaction"); 532 goto destroy_blkring; 533 } 534 xenbus_set_state(dev, XenbusStateInitialised); 535 536 return 0; 537 538 abort_transaction: 539 xenbus_transaction_end(xbt, 1); 540 if (message) 541 xenbus_dev_fatal(dev, err, "%s", message); 542 destroy_blkring: 543 blkif_free(info, 0); 544 out: 545 return err; 546 } 547 548 static int 549 setup_blkring(device_t dev, struct blkfront_info *info) 550 { 551 blkif_sring_t *sring; 552 int error; 553 554 info->ring_ref = GRANT_INVALID_REF; 555 556 sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); 557 if (sring == NULL) { 558 xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring"); 559 return ENOMEM; 560 } 561 SHARED_RING_INIT(sring); 562 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); 563 564 error = xenbus_grant_ring(dev, 565 (vtomach(info->ring.sring) >> PAGE_SHIFT), &info->ring_ref); 566 if (error) { 567 free(sring, M_DEVBUF); 568 info->ring.sring = NULL; 569 goto fail; 570 } 571 572 error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev), 573 "xbd", (driver_intr_t *)blkif_int, info, 574 INTR_TYPE_BIO | INTR_MPSAFE, &info->irq); 575 if (error) { 576 xenbus_dev_fatal(dev, error, 577 "bind_evtchn_to_irqhandler failed"); 578 goto fail; 579 } 580 581 return (0); 582 fail: 583 blkif_free(info, 0); 584 return (error); 585 } 586 587 588 /** 589 * Callback received when the backend's state changes. 590 */ 591 static int 592 blkfront_backend_changed(device_t dev, XenbusState backend_state) 593 { 594 struct blkfront_info *info = device_get_softc(dev); 595 596 DPRINTK("backend_state=%d\n", backend_state); 597 598 switch (backend_state) { 599 case XenbusStateUnknown: 600 case XenbusStateInitialising: 601 case XenbusStateInitWait: 602 case XenbusStateInitialised: 603 case XenbusStateClosed: 604 case XenbusStateReconfigured: 605 case XenbusStateReconfiguring: 606 break; 607 608 case XenbusStateConnected: 609 connect(dev, info); 610 break; 611 612 case XenbusStateClosing: 613 if (info->users > 0) 614 xenbus_dev_error(dev, -EBUSY, 615 "Device in use; refusing to close"); 616 else 617 blkfront_closing(dev); 618 #ifdef notyet 619 bd = bdget(info->dev); 620 if (bd == NULL) 621 xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); 622 623 down(&bd->bd_sem); 624 if (info->users > 0) 625 xenbus_dev_error(dev, -EBUSY, 626 "Device in use; refusing to close"); 627 else 628 blkfront_closing(dev); 629 up(&bd->bd_sem); 630 bdput(bd); 631 #endif 632 } 633 634 return (0); 635 } 636 637 /* 638 ** Invoked when the backend is finally 'ready' (and has told produced 639 ** the details about the physical device - #sectors, size, etc). 640 */ 641 static void 642 connect(device_t dev, struct blkfront_info *info) 643 { 644 unsigned long sectors, sector_size; 645 unsigned int binfo; 646 int err; 647 648 if( (info->connected == BLKIF_STATE_CONNECTED) || 649 (info->connected == BLKIF_STATE_SUSPENDED) ) 650 return; 651 652 DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); 653 654 err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev), 655 "sectors", "%lu", §ors, 656 "info", "%u", &binfo, 657 "sector-size", "%lu", §or_size, 658 NULL); 659 if (err) { 660 xenbus_dev_fatal(dev, err, 661 "reading backend fields at %s", 662 xenbus_get_otherend_path(dev)); 663 return; 664 } 665 err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev), 666 "feature-barrier", "%lu", &info->feature_barrier, 667 NULL); 668 if (err) 669 info->feature_barrier = 0; 670 671 device_printf(dev, "%juMB <%s> at %s", 672 (uintmax_t) sectors / (1048576 / sector_size), 673 device_get_desc(dev), 674 xenbus_get_node(dev)); 675 bus_print_child_footer(device_get_parent(dev), dev); 676 677 xlvbd_add(dev, sectors, info->vdevice, binfo, sector_size, info); 678 679 (void)xenbus_set_state(dev, XenbusStateConnected); 680 681 /* Kick pending requests. */ 682 mtx_lock(&blkif_io_lock); 683 info->connected = BLKIF_STATE_CONNECTED; 684 kick_pending_request_queues(info); 685 mtx_unlock(&blkif_io_lock); 686 info->is_ready = 1; 687 688 #if 0 689 add_disk(info->gd); 690 #endif 691 } 692 693 /** 694 * Handle the change of state of the backend to Closing. We must delete our 695 * device-layer structures now, to ensure that writes are flushed through to 696 * the backend. Once is this done, we can switch to Closed in 697 * acknowledgement. 698 */ 699 static void 700 blkfront_closing(device_t dev) 701 { 702 struct blkfront_info *info = device_get_softc(dev); 703 704 DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev)); 705 706 if (info->mi) { 707 DPRINTK("Calling xlvbd_del\n"); 708 xlvbd_del(info); 709 info->mi = NULL; 710 } 711 712 xenbus_set_state(dev, XenbusStateClosed); 713 } 714 715 716 static int 717 blkfront_detach(device_t dev) 718 { 719 struct blkfront_info *info = device_get_softc(dev); 720 721 DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev)); 722 723 blkif_free(info, 0); 724 725 return 0; 726 } 727 728 729 static inline int 730 GET_ID_FROM_FREELIST(struct blkfront_info *info) 731 { 732 unsigned long nfree = info->shadow_free; 733 734 KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree)); 735 info->shadow_free = info->shadow[nfree].req.id; 736 info->shadow[nfree].req.id = 0x0fffffee; /* debug */ 737 atomic_add_int(&blkif_queued_requests, 1); 738 return nfree; 739 } 740 741 static inline void 742 ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id) 743 { 744 info->shadow[id].req.id = info->shadow_free; 745 info->shadow[id].request = 0; 746 info->shadow_free = id; 747 atomic_subtract_int(&blkif_queued_requests, 1); 748 } 749 750 static inline void 751 flush_requests(struct blkfront_info *info) 752 { 753 int notify; 754 755 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); 756 757 if (notify) 758 notify_remote_via_irq(info->irq); 759 } 760 761 static void 762 kick_pending_request_queues(struct blkfront_info *info) 763 { 764 /* XXX check if we can't simplify */ 765 #if 0 766 if (!RING_FULL(&info->ring)) { 767 /* Re-enable calldowns. */ 768 blk_start_queue(info->rq); 769 /* Kick things off immediately. */ 770 do_blkif_request(info->rq); 771 } 772 #endif 773 if (!RING_FULL(&info->ring)) { 774 #if 0 775 sc = LIST_FIRST(&xbsl_head); 776 LIST_REMOVE(sc, entry); 777 /* Re-enable calldowns. */ 778 blk_start_queue(di->rq); 779 #endif 780 /* Kick things off immediately. */ 781 xb_startio(info->sc); 782 } 783 } 784 785 #if 0 786 /* XXX */ 787 static void blkif_restart_queue(void *arg) 788 { 789 struct blkfront_info *info = (struct blkfront_info *)arg; 790 791 mtx_lock(&blkif_io_lock); 792 kick_pending_request_queues(info); 793 mtx_unlock(&blkif_io_lock); 794 } 795 #endif 796 797 static void blkif_restart_queue_callback(void *arg) 798 { 799 #if 0 800 struct blkfront_info *info = (struct blkfront_info *)arg; 801 /* XXX BSD equiv ? */ 802 803 schedule_work(&info->work); 804 #endif 805 } 806 807 static int 808 blkif_open(struct disk *dp) 809 { 810 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 811 812 if (sc == NULL) { 813 printf("xb%d: not found", sc->xb_unit); 814 return (ENXIO); 815 } 816 817 sc->xb_flags |= XB_OPEN; 818 sc->xb_info->users++; 819 return (0); 820 } 821 822 static int 823 blkif_close(struct disk *dp) 824 { 825 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 826 827 if (sc == NULL) 828 return (ENXIO); 829 sc->xb_flags &= ~XB_OPEN; 830 if (--(sc->xb_info->users) == 0) { 831 /* Check whether we have been instructed to close. We will 832 have ignored this request initially, as the device was 833 still mounted. */ 834 device_t dev = sc->xb_info->xbdev; 835 XenbusState state = 836 xenbus_read_driver_state(xenbus_get_otherend_path(dev)); 837 838 if (state == XenbusStateClosing) 839 blkfront_closing(dev); 840 } 841 return (0); 842 } 843 844 static int 845 blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) 846 { 847 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 848 849 if (sc == NULL) 850 return (ENXIO); 851 852 return (ENOTTY); 853 } 854 855 856 /* 857 * blkif_queue_request 858 * 859 * request block io 860 * 861 * id: for guest use only. 862 * operation: BLKIF_OP_{READ,WRITE,PROBE} 863 * buffer: buffer to read/write into. this should be a 864 * virtual address in the guest os. 865 */ 866 static int blkif_queue_request(struct bio *bp) 867 { 868 caddr_t alignbuf; 869 vm_paddr_t buffer_ma; 870 blkif_request_t *ring_req; 871 unsigned long id; 872 uint64_t fsect, lsect; 873 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 874 struct blkfront_info *info = sc->xb_info; 875 int ref; 876 877 if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED)) 878 return 1; 879 880 if (gnttab_alloc_grant_references( 881 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { 882 gnttab_request_free_callback( 883 &info->callback, 884 blkif_restart_queue_callback, 885 info, 886 BLKIF_MAX_SEGMENTS_PER_REQUEST); 887 return 1; 888 } 889 890 /* Check if the buffer is properly aligned */ 891 if ((vm_offset_t)bp->bio_data & PAGE_MASK) { 892 int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE : 893 PAGE_SIZE; 894 caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF, 895 M_NOWAIT); 896 897 alignbuf = (char *)roundup2((u_long)newbuf, align); 898 899 /* save a copy of the current buffer */ 900 bp->bio_driver1 = newbuf; 901 bp->bio_driver2 = alignbuf; 902 903 /* Copy the data for a write */ 904 if (bp->bio_cmd == BIO_WRITE) 905 bcopy(bp->bio_data, alignbuf, bp->bio_bcount); 906 } else 907 alignbuf = bp->bio_data; 908 909 /* Fill out a communications ring structure. */ 910 ring_req = RING_GET_REQUEST(&info->ring, 911 info->ring.req_prod_pvt); 912 id = GET_ID_FROM_FREELIST(info); 913 info->shadow[id].request = (unsigned long)bp; 914 915 ring_req->id = id; 916 ring_req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ : 917 BLKIF_OP_WRITE; 918 919 ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno; 920 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk; 921 922 ring_req->nr_segments = 0; /* XXX not doing scatter/gather since buffer 923 * chaining is not supported. 924 */ 925 926 buffer_ma = vtomach(alignbuf); 927 fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; 928 lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1; 929 /* install a grant reference. */ 930 ref = gnttab_claim_grant_reference(&gref_head); 931 KASSERT( ref != -ENOSPC, ("grant_reference failed") ); 932 933 gnttab_grant_foreign_access_ref( 934 ref, 935 xenbus_get_otherend_id(info->xbdev), 936 buffer_ma >> PAGE_SHIFT, 937 ring_req->operation & 1 ); /* ??? */ 938 info->shadow[id].frame[ring_req->nr_segments] = 939 buffer_ma >> PAGE_SHIFT; 940 941 ring_req->seg[ring_req->nr_segments] = 942 (struct blkif_request_segment) { 943 .gref = ref, 944 .first_sect = fsect, 945 .last_sect = lsect }; 946 947 ring_req->nr_segments++; 948 KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0, 949 ("XEN buffer must be sector aligned")); 950 KASSERT(lsect <= 7, 951 ("XEN disk driver data cannot cross a page boundary")); 952 953 buffer_ma &= ~PAGE_MASK; 954 955 info->ring.req_prod_pvt++; 956 957 /* Keep a private copy so we can reissue requests when recovering. */ 958 info->shadow[id].req = *ring_req; 959 960 gnttab_free_grant_references(gref_head); 961 962 return 0; 963 } 964 965 966 967 /* 968 * Dequeue buffers and place them in the shared communication ring. 969 * Return when no more requests can be accepted or all buffers have 970 * been queued. 971 * 972 * Signal XEN once the ring has been filled out. 973 */ 974 static void 975 xb_startio(struct xb_softc *sc) 976 { 977 struct bio *bp; 978 int queued = 0; 979 struct blkfront_info *info = sc->xb_info; 980 DPRINTK(""); 981 982 mtx_assert(&blkif_io_lock, MA_OWNED); 983 984 while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) { 985 986 if (RING_FULL(&info->ring)) 987 goto wait; 988 989 if (blkif_queue_request(bp)) { 990 wait: 991 bioq_insert_head(&sc->xb_bioq, bp); 992 break; 993 } 994 queued++; 995 } 996 997 if (queued != 0) 998 flush_requests(sc->xb_info); 999 } 1000 1001 static void 1002 blkif_int(void *xsc) 1003 { 1004 struct xb_softc *sc = NULL; 1005 struct bio *bp; 1006 blkif_response_t *bret; 1007 RING_IDX i, rp; 1008 struct blkfront_info *info = xsc; 1009 DPRINTK(""); 1010 1011 TRACE_ENTER; 1012 1013 mtx_lock(&blkif_io_lock); 1014 1015 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { 1016 mtx_unlock(&blkif_io_lock); 1017 return; 1018 } 1019 1020 again: 1021 rp = info->ring.sring->rsp_prod; 1022 rmb(); /* Ensure we see queued responses up to 'rp'. */ 1023 1024 for (i = info->ring.rsp_cons; i != rp; i++) { 1025 unsigned long id; 1026 1027 bret = RING_GET_RESPONSE(&info->ring, i); 1028 id = bret->id; 1029 bp = (struct bio *)info->shadow[id].request; 1030 1031 blkif_completion(&info->shadow[id]); 1032 1033 ADD_ID_TO_FREELIST(info, id); 1034 1035 switch (bret->operation) { 1036 case BLKIF_OP_READ: 1037 /* had an unaligned buffer that needs to be copied */ 1038 if (bp->bio_driver1) 1039 bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount); 1040 /* FALLTHROUGH */ 1041 case BLKIF_OP_WRITE: 1042 1043 /* free the copy buffer */ 1044 if (bp->bio_driver1) { 1045 free(bp->bio_driver1, M_DEVBUF); 1046 bp->bio_driver1 = NULL; 1047 } 1048 1049 if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) { 1050 printf("Bad return from blkdev data request: %x\n", 1051 bret->status); 1052 bp->bio_flags |= BIO_ERROR; 1053 } 1054 1055 sc = (struct xb_softc *)bp->bio_disk->d_drv1; 1056 1057 if (bp->bio_flags & BIO_ERROR) 1058 bp->bio_error = EIO; 1059 else 1060 bp->bio_resid = 0; 1061 1062 biodone(bp); 1063 break; 1064 default: 1065 panic("received invalid operation"); 1066 break; 1067 } 1068 } 1069 1070 info->ring.rsp_cons = i; 1071 1072 if (i != info->ring.req_prod_pvt) { 1073 int more_to_do; 1074 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); 1075 if (more_to_do) 1076 goto again; 1077 } else { 1078 info->ring.sring->rsp_event = i + 1; 1079 } 1080 1081 kick_pending_request_queues(info); 1082 1083 mtx_unlock(&blkif_io_lock); 1084 } 1085 1086 static void 1087 blkif_free(struct blkfront_info *info, int suspend) 1088 { 1089 1090 /* Prevent new requests being issued until we fix things up. */ 1091 mtx_lock(&blkif_io_lock); 1092 info->connected = suspend ? 1093 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 1094 mtx_unlock(&blkif_io_lock); 1095 1096 /* Free resources associated with old device channel. */ 1097 if (info->ring_ref != GRANT_INVALID_REF) { 1098 gnttab_end_foreign_access(info->ring_ref, 1099 info->ring.sring); 1100 info->ring_ref = GRANT_INVALID_REF; 1101 info->ring.sring = NULL; 1102 } 1103 if (info->irq) 1104 unbind_from_irqhandler(info->irq); 1105 info->irq = 0; 1106 1107 } 1108 1109 static void 1110 blkif_completion(struct blk_shadow *s) 1111 { 1112 int i; 1113 1114 for (i = 0; i < s->req.nr_segments; i++) 1115 gnttab_end_foreign_access(s->req.seg[i].gref, 0UL); 1116 } 1117 1118 static void 1119 blkif_recover(struct blkfront_info *info) 1120 { 1121 int i, j; 1122 blkif_request_t *req; 1123 struct blk_shadow *copy; 1124 1125 if (!info->sc) 1126 return; 1127 1128 /* Stage 1: Make a safe copy of the shadow state. */ 1129 copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO); 1130 memcpy(copy, info->shadow, sizeof(info->shadow)); 1131 1132 /* Stage 2: Set up free list. */ 1133 memset(&info->shadow, 0, sizeof(info->shadow)); 1134 for (i = 0; i < BLK_RING_SIZE; i++) 1135 info->shadow[i].req.id = i+1; 1136 info->shadow_free = info->ring.req_prod_pvt; 1137 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 1138 1139 /* Stage 3: Find pending requests and requeue them. */ 1140 for (i = 0; i < BLK_RING_SIZE; i++) { 1141 /* Not in use? */ 1142 if (copy[i].request == 0) 1143 continue; 1144 1145 /* Grab a request slot and copy shadow state into it. */ 1146 req = RING_GET_REQUEST( 1147 &info->ring, info->ring.req_prod_pvt); 1148 *req = copy[i].req; 1149 1150 /* We get a new request id, and must reset the shadow state. */ 1151 req->id = GET_ID_FROM_FREELIST(info); 1152 memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); 1153 1154 /* Rewrite any grant references invalidated by suspend/resume. */ 1155 for (j = 0; j < req->nr_segments; j++) 1156 gnttab_grant_foreign_access_ref( 1157 req->seg[j].gref, 1158 xenbus_get_otherend_id(info->xbdev), 1159 pfn_to_mfn(info->shadow[req->id].frame[j]), 1160 0 /* assume not readonly */); 1161 1162 info->shadow[req->id].req = *req; 1163 1164 info->ring.req_prod_pvt++; 1165 } 1166 1167 free(copy, M_DEVBUF); 1168 1169 xenbus_set_state(info->xbdev, XenbusStateConnected); 1170 1171 /* Now safe for us to use the shared ring */ 1172 mtx_lock(&blkif_io_lock); 1173 info->connected = BLKIF_STATE_CONNECTED; 1174 mtx_unlock(&blkif_io_lock); 1175 1176 /* Send off requeued requests */ 1177 mtx_lock(&blkif_io_lock); 1178 flush_requests(info); 1179 1180 /* Kick any other new requests queued since we resumed */ 1181 kick_pending_request_queues(info); 1182 mtx_unlock(&blkif_io_lock); 1183 } 1184 1185 /* ** Driver registration ** */ 1186 static device_method_t blkfront_methods[] = { 1187 /* Device interface */ 1188 DEVMETHOD(device_probe, blkfront_probe), 1189 DEVMETHOD(device_attach, blkfront_attach), 1190 DEVMETHOD(device_detach, blkfront_detach), 1191 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1192 DEVMETHOD(device_suspend, blkfront_suspend), 1193 DEVMETHOD(device_resume, blkfront_resume), 1194 1195 /* Xenbus interface */ 1196 DEVMETHOD(xenbus_backend_changed, blkfront_backend_changed), 1197 1198 { 0, 0 } 1199 }; 1200 1201 static driver_t blkfront_driver = { 1202 "xbd", 1203 blkfront_methods, 1204 sizeof(struct blkfront_info), 1205 }; 1206 devclass_t blkfront_devclass; 1207 1208 DRIVER_MODULE(xbd, xenbus, blkfront_driver, blkfront_devclass, 0, 0); 1209 1210 MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */ 1211 1212