1 /*- 2 * All rights reserved. 3 * 4 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 5 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 6 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 7 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 8 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 9 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 10 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 11 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 12 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 13 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 14 * SUCH DAMAGE. 15 * 16 */ 17 18 /* 19 * XenBSD block device driver 20 * 21 * Copyright (c) 2009 Frank Suchomel, Citrix 22 */ 23 24 #include <sys/cdefs.h> 25 __FBSDID("$FreeBSD$"); 26 27 #include <sys/param.h> 28 #include <sys/systm.h> 29 #include <sys/malloc.h> 30 #include <sys/kernel.h> 31 #include <vm/vm.h> 32 #include <vm/pmap.h> 33 34 #include <sys/bio.h> 35 #include <sys/bus.h> 36 #include <sys/conf.h> 37 #include <sys/module.h> 38 39 #include <machine/bus.h> 40 #include <sys/rman.h> 41 #include <machine/resource.h> 42 #include <machine/intr_machdep.h> 43 #include <machine/vmparam.h> 44 45 #include <machine/xen/xen-os.h> 46 #include <machine/xen/xenfunc.h> 47 #include <xen/hypervisor.h> 48 #include <xen/xen_intr.h> 49 #include <xen/evtchn.h> 50 #include <xen/gnttab.h> 51 #include <xen/interface/grant_table.h> 52 #include <xen/interface/io/protocols.h> 53 #include <xen/xenbus/xenbusvar.h> 54 55 #include <geom/geom_disk.h> 56 57 #include <dev/xen/blkfront/block.h> 58 59 #include "xenbus_if.h" 60 61 #define ASSERT(S) KASSERT(S, (#S)) 62 /* prototypes */ 63 struct xb_softc; 64 static void xb_startio(struct xb_softc *sc); 65 static void connect(device_t, struct blkfront_info *); 66 static void blkfront_closing(device_t); 67 static int blkfront_detach(device_t); 68 static int talk_to_backend(device_t, struct blkfront_info *); 69 static int setup_blkring(device_t, struct blkfront_info *); 70 static void blkif_int(void *); 71 #if 0 72 static void blkif_restart_queue(void *arg); 73 #endif 74 static void blkif_recover(struct blkfront_info *); 75 static void blkif_completion(struct blk_shadow *); 76 static void blkif_free(struct blkfront_info *, int); 77 78 #define GRANT_INVALID_REF 0 79 #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) 80 81 LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head; 82 83 /* Control whether runtime update of vbds is enabled. */ 84 #define ENABLE_VBD_UPDATE 0 85 86 #if ENABLE_VBD_UPDATE 87 static void vbd_update(void); 88 #endif 89 90 91 #define BLKIF_STATE_DISCONNECTED 0 92 #define BLKIF_STATE_CONNECTED 1 93 #define BLKIF_STATE_SUSPENDED 2 94 95 #ifdef notyet 96 static char *blkif_state_name[] = { 97 [BLKIF_STATE_DISCONNECTED] = "disconnected", 98 [BLKIF_STATE_CONNECTED] = "connected", 99 [BLKIF_STATE_SUSPENDED] = "closed", 100 }; 101 102 static char * blkif_status_name[] = { 103 [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", 104 [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", 105 [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", 106 [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", 107 }; 108 #endif 109 #define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args) 110 #if 0 111 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args) 112 #else 113 #define DPRINTK(fmt, args...) 114 #endif 115 116 static grant_ref_t gref_head; 117 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ 118 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) 119 120 static void kick_pending_request_queues(struct blkfront_info *); 121 static int blkif_open(struct disk *dp); 122 static int blkif_close(struct disk *dp); 123 static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td); 124 static int blkif_queue_request(struct bio *bp); 125 static void xb_strategy(struct bio *bp); 126 127 // In order to quiesce the device during kernel dumps, outstanding requests to 128 // DOM0 for disk reads/writes need to be accounted for. 129 static int blkif_queued_requests; 130 static int xb_dump(void *, void *, vm_offset_t, off_t, size_t); 131 132 133 /* XXX move to xb_vbd.c when VBD update support is added */ 134 #define MAX_VBDS 64 135 136 #define XBD_SECTOR_SIZE 512 /* XXX: assume for now */ 137 #define XBD_SECTOR_SHFT 9 138 139 static struct mtx blkif_io_lock; 140 141 static vm_paddr_t 142 pfn_to_mfn(vm_paddr_t pfn) 143 { 144 return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT); 145 } 146 147 /* 148 * Translate Linux major/minor to an appropriate name and unit 149 * number. For HVM guests, this allows us to use the same drive names 150 * with blkfront as the emulated drives, easing transition slightly. 151 */ 152 static void 153 blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name) 154 { 155 static struct vdev_info { 156 int major; 157 int shift; 158 int base; 159 const char *name; 160 } info[] = { 161 {3, 6, 0, "ad"}, /* ide0 */ 162 {22, 6, 2, "ad"}, /* ide1 */ 163 {33, 6, 4, "ad"}, /* ide2 */ 164 {34, 6, 6, "ad"}, /* ide3 */ 165 {56, 6, 8, "ad"}, /* ide4 */ 166 {57, 6, 10, "ad"}, /* ide5 */ 167 {88, 6, 12, "ad"}, /* ide6 */ 168 {89, 6, 14, "ad"}, /* ide7 */ 169 {90, 6, 16, "ad"}, /* ide8 */ 170 {91, 6, 18, "ad"}, /* ide9 */ 171 172 {8, 4, 0, "da"}, /* scsi disk0 */ 173 {65, 4, 16, "da"}, /* scsi disk1 */ 174 {66, 4, 32, "da"}, /* scsi disk2 */ 175 {67, 4, 48, "da"}, /* scsi disk3 */ 176 {68, 4, 64, "da"}, /* scsi disk4 */ 177 {69, 4, 80, "da"}, /* scsi disk5 */ 178 {70, 4, 96, "da"}, /* scsi disk6 */ 179 {71, 4, 112, "da"}, /* scsi disk7 */ 180 {128, 4, 128, "da"}, /* scsi disk8 */ 181 {129, 4, 144, "da"}, /* scsi disk9 */ 182 {130, 4, 160, "da"}, /* scsi disk10 */ 183 {131, 4, 176, "da"}, /* scsi disk11 */ 184 {132, 4, 192, "da"}, /* scsi disk12 */ 185 {133, 4, 208, "da"}, /* scsi disk13 */ 186 {134, 4, 224, "da"}, /* scsi disk14 */ 187 {135, 4, 240, "da"}, /* scsi disk15 */ 188 189 {202, 4, 0, "xbd"}, /* xbd */ 190 191 {0, 0, 0, NULL}, 192 }; 193 int major = vdevice >> 8; 194 int minor = vdevice & 0xff; 195 int i; 196 197 if (vdevice & (1 << 28)) { 198 *unit = (vdevice & ((1 << 28) - 1)) >> 8; 199 *name = "xbd"; 200 } 201 202 for (i = 0; info[i].major; i++) { 203 if (info[i].major == major) { 204 *unit = info[i].base + (minor >> info[i].shift); 205 *name = info[i].name; 206 return; 207 } 208 } 209 210 *unit = minor >> 4; 211 *name = "xbd"; 212 } 213 214 int 215 xlvbd_add(device_t dev, blkif_sector_t capacity, 216 int vdevice, uint16_t vdisk_info, uint16_t sector_size, 217 struct blkfront_info *info) 218 { 219 struct xb_softc *sc; 220 int unit, error = 0; 221 const char *name; 222 223 blkfront_vdevice_to_unit(vdevice, &unit, &name); 224 225 sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 226 sc->xb_unit = unit; 227 sc->xb_info = info; 228 info->sc = sc; 229 230 if (strcmp(name, "xbd")) 231 device_printf(dev, "attaching as %s%d\n", name, unit); 232 233 memset(&sc->xb_disk, 0, sizeof(sc->xb_disk)); 234 sc->xb_disk = disk_alloc(); 235 sc->xb_disk->d_unit = sc->xb_unit; 236 sc->xb_disk->d_open = blkif_open; 237 sc->xb_disk->d_close = blkif_close; 238 sc->xb_disk->d_ioctl = blkif_ioctl; 239 sc->xb_disk->d_strategy = xb_strategy; 240 sc->xb_disk->d_dump = xb_dump; 241 sc->xb_disk->d_name = name; 242 sc->xb_disk->d_drv1 = sc; 243 sc->xb_disk->d_sectorsize = sector_size; 244 245 /* XXX */ 246 sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT; 247 #if 0 248 sc->xb_disk->d_maxsize = DFLTPHYS; 249 #else /* XXX: xen can't handle large single i/o requests */ 250 sc->xb_disk->d_maxsize = 4096; 251 #endif 252 #ifdef notyet 253 XENPRINTF("attaching device 0x%x unit %d capacity %llu\n", 254 xb_diskinfo[sc->xb_unit].device, sc->xb_unit, 255 sc->xb_disk->d_mediasize); 256 #endif 257 sc->xb_disk->d_flags = 0; 258 disk_create(sc->xb_disk, DISK_VERSION_00); 259 bioq_init(&sc->xb_bioq); 260 261 return error; 262 } 263 264 void 265 xlvbd_del(struct blkfront_info *info) 266 { 267 struct xb_softc *sc; 268 269 sc = info->sc; 270 disk_destroy(sc->xb_disk); 271 } 272 /************************ end VBD support *****************/ 273 274 /* 275 * Read/write routine for a buffer. Finds the proper unit, place it on 276 * the sortq and kick the controller. 277 */ 278 static void 279 xb_strategy(struct bio *bp) 280 { 281 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 282 283 /* bogus disk? */ 284 if (sc == NULL) { 285 bp->bio_error = EINVAL; 286 bp->bio_flags |= BIO_ERROR; 287 goto bad; 288 } 289 290 DPRINTK(""); 291 292 /* 293 * Place it in the queue of disk activities for this disk 294 */ 295 mtx_lock(&blkif_io_lock); 296 297 bioq_disksort(&sc->xb_bioq, bp); 298 xb_startio(sc); 299 300 mtx_unlock(&blkif_io_lock); 301 return; 302 303 bad: 304 /* 305 * Correctly set the bio to indicate a failed tranfer. 306 */ 307 bp->bio_resid = bp->bio_bcount; 308 biodone(bp); 309 return; 310 } 311 312 static void xb_quiesce(struct blkfront_info *info); 313 // Quiesce the disk writes for a dump file before allowing the next buffer. 314 static void 315 xb_quiesce(struct blkfront_info *info) 316 { 317 int mtd; 318 319 // While there are outstanding requests 320 while (blkif_queued_requests) { 321 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, mtd); 322 if (mtd) { 323 // Recieved request completions, update queue. 324 blkif_int(info); 325 } 326 if (blkif_queued_requests) { 327 // Still pending requests, wait for the disk i/o to complete 328 HYPERVISOR_block(); 329 } 330 } 331 } 332 333 // Some bio structures for dumping core 334 #define DUMP_BIO_NO 16 // 16 * 4KB = 64KB dump block 335 static struct bio xb_dump_bp[DUMP_BIO_NO]; 336 337 // Kernel dump function for a paravirtualized disk device 338 static int 339 xb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, 340 size_t length) 341 { 342 int sbp; 343 int mbp; 344 size_t chunk; 345 struct disk *dp = arg; 346 struct xb_softc *sc = (struct xb_softc *) dp->d_drv1; 347 int rc = 0; 348 349 xb_quiesce(sc->xb_info); // All quiet on the western front. 350 if (length > 0) { 351 // If this lock is held, then this module is failing, and a successful 352 // kernel dump is highly unlikely anyway. 353 mtx_lock(&blkif_io_lock); 354 // Split the 64KB block into 16 4KB blocks 355 for (sbp=0; length>0 && sbp<DUMP_BIO_NO; sbp++) { 356 chunk = length > PAGE_SIZE ? PAGE_SIZE : length; 357 xb_dump_bp[sbp].bio_disk = dp; 358 xb_dump_bp[sbp].bio_pblkno = offset / dp->d_sectorsize; 359 xb_dump_bp[sbp].bio_bcount = chunk; 360 xb_dump_bp[sbp].bio_resid = chunk; 361 xb_dump_bp[sbp].bio_data = virtual; 362 xb_dump_bp[sbp].bio_cmd = BIO_WRITE; 363 xb_dump_bp[sbp].bio_done = NULL; 364 365 bioq_disksort(&sc->xb_bioq, &xb_dump_bp[sbp]); 366 367 length -= chunk; 368 offset += chunk; 369 virtual = (char *) virtual + chunk; 370 } 371 // Tell DOM0 to do the I/O 372 xb_startio(sc); 373 mtx_unlock(&blkif_io_lock); 374 375 // Must wait for the completion: the dump routine reuses the same 376 // 16 x 4KB buffer space. 377 xb_quiesce(sc->xb_info); // All quite on the eastern front 378 // If there were any errors, bail out... 379 for (mbp=0; mbp<sbp; mbp++) { 380 if ((rc = xb_dump_bp[mbp].bio_error)) break; 381 } 382 } 383 return (rc); 384 } 385 386 387 static int 388 blkfront_probe(device_t dev) 389 { 390 391 if (!strcmp(xenbus_get_type(dev), "vbd")) { 392 device_set_desc(dev, "Virtual Block Device"); 393 device_quiet(dev); 394 return (0); 395 } 396 397 return (ENXIO); 398 } 399 400 /* 401 * Setup supplies the backend dir, virtual device. We place an event 402 * channel and shared frame entries. We watch backend to wait if it's 403 * ok. 404 */ 405 static int 406 blkfront_attach(device_t dev) 407 { 408 int error, vdevice, i, unit; 409 struct blkfront_info *info; 410 const char *name; 411 412 /* FIXME: Use dynamic device id if this is not set. */ 413 error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev), 414 "virtual-device", NULL, "%i", &vdevice); 415 if (error) { 416 xenbus_dev_fatal(dev, error, "reading virtual-device"); 417 printf("couldn't find virtual device"); 418 return (error); 419 } 420 421 blkfront_vdevice_to_unit(vdevice, &unit, &name); 422 if (!strcmp(name, "xbd")) 423 device_set_unit(dev, unit); 424 425 info = device_get_softc(dev); 426 427 /* 428 * XXX debug only 429 */ 430 for (i = 0; i < sizeof(*info); i++) 431 if (((uint8_t *)info)[i] != 0) 432 panic("non-null memory"); 433 434 info->shadow_free = 0; 435 info->xbdev = dev; 436 info->vdevice = vdevice; 437 info->connected = BLKIF_STATE_DISCONNECTED; 438 439 /* work queue needed ? */ 440 for (i = 0; i < BLK_RING_SIZE; i++) 441 info->shadow[i].req.id = i+1; 442 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 443 444 /* Front end dir is a number, which is used as the id. */ 445 info->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0); 446 447 error = talk_to_backend(dev, info); 448 if (error) 449 return (error); 450 451 return (0); 452 } 453 454 static int 455 blkfront_suspend(device_t dev) 456 { 457 struct blkfront_info *info = device_get_softc(dev); 458 459 /* Prevent new requests being issued until we fix things up. */ 460 mtx_lock(&blkif_io_lock); 461 info->connected = BLKIF_STATE_SUSPENDED; 462 mtx_unlock(&blkif_io_lock); 463 464 return (0); 465 } 466 467 static int 468 blkfront_resume(device_t dev) 469 { 470 struct blkfront_info *info = device_get_softc(dev); 471 int err; 472 473 DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev)); 474 475 blkif_free(info, 1); 476 err = talk_to_backend(dev, info); 477 if (info->connected == BLKIF_STATE_SUSPENDED && !err) 478 blkif_recover(info); 479 480 return (err); 481 } 482 483 /* Common code used when first setting up, and when resuming. */ 484 static int 485 talk_to_backend(device_t dev, struct blkfront_info *info) 486 { 487 const char *message = NULL; 488 struct xenbus_transaction xbt; 489 int err; 490 491 /* Create shared ring, alloc event channel. */ 492 err = setup_blkring(dev, info); 493 if (err) 494 goto out; 495 496 again: 497 err = xenbus_transaction_start(&xbt); 498 if (err) { 499 xenbus_dev_fatal(dev, err, "starting transaction"); 500 goto destroy_blkring; 501 } 502 503 err = xenbus_printf(xbt, xenbus_get_node(dev), 504 "ring-ref","%u", info->ring_ref); 505 if (err) { 506 message = "writing ring-ref"; 507 goto abort_transaction; 508 } 509 err = xenbus_printf(xbt, xenbus_get_node(dev), 510 "event-channel", "%u", irq_to_evtchn_port(info->irq)); 511 if (err) { 512 message = "writing event-channel"; 513 goto abort_transaction; 514 } 515 err = xenbus_printf(xbt, xenbus_get_node(dev), 516 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); 517 if (err) { 518 message = "writing protocol"; 519 goto abort_transaction; 520 } 521 522 err = xenbus_transaction_end(xbt, 0); 523 if (err) { 524 if (err == EAGAIN) 525 goto again; 526 xenbus_dev_fatal(dev, err, "completing transaction"); 527 goto destroy_blkring; 528 } 529 xenbus_set_state(dev, XenbusStateInitialised); 530 531 return 0; 532 533 abort_transaction: 534 xenbus_transaction_end(xbt, 1); 535 if (message) 536 xenbus_dev_fatal(dev, err, "%s", message); 537 destroy_blkring: 538 blkif_free(info, 0); 539 out: 540 return err; 541 } 542 543 static int 544 setup_blkring(device_t dev, struct blkfront_info *info) 545 { 546 blkif_sring_t *sring; 547 int error; 548 549 info->ring_ref = GRANT_INVALID_REF; 550 551 sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); 552 if (sring == NULL) { 553 xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring"); 554 return ENOMEM; 555 } 556 SHARED_RING_INIT(sring); 557 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); 558 559 error = xenbus_grant_ring(dev, 560 (vtomach(info->ring.sring) >> PAGE_SHIFT), &info->ring_ref); 561 if (error) { 562 free(sring, M_DEVBUF); 563 info->ring.sring = NULL; 564 goto fail; 565 } 566 567 error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev), 568 "xbd", (driver_intr_t *)blkif_int, info, 569 INTR_TYPE_BIO | INTR_MPSAFE, &info->irq); 570 if (error) { 571 xenbus_dev_fatal(dev, error, 572 "bind_evtchn_to_irqhandler failed"); 573 goto fail; 574 } 575 576 return (0); 577 fail: 578 blkif_free(info, 0); 579 return (error); 580 } 581 582 583 /** 584 * Callback received when the backend's state changes. 585 */ 586 static int 587 blkfront_backend_changed(device_t dev, XenbusState backend_state) 588 { 589 struct blkfront_info *info = device_get_softc(dev); 590 591 DPRINTK("backend_state=%d\n", backend_state); 592 593 switch (backend_state) { 594 case XenbusStateUnknown: 595 case XenbusStateInitialising: 596 case XenbusStateInitWait: 597 case XenbusStateInitialised: 598 case XenbusStateClosed: 599 case XenbusStateReconfigured: 600 case XenbusStateReconfiguring: 601 break; 602 603 case XenbusStateConnected: 604 connect(dev, info); 605 break; 606 607 case XenbusStateClosing: 608 if (info->users > 0) 609 xenbus_dev_error(dev, -EBUSY, 610 "Device in use; refusing to close"); 611 else 612 blkfront_closing(dev); 613 #ifdef notyet 614 bd = bdget(info->dev); 615 if (bd == NULL) 616 xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); 617 618 down(&bd->bd_sem); 619 if (info->users > 0) 620 xenbus_dev_error(dev, -EBUSY, 621 "Device in use; refusing to close"); 622 else 623 blkfront_closing(dev); 624 up(&bd->bd_sem); 625 bdput(bd); 626 #endif 627 } 628 629 return (0); 630 } 631 632 /* 633 ** Invoked when the backend is finally 'ready' (and has told produced 634 ** the details about the physical device - #sectors, size, etc). 635 */ 636 static void 637 connect(device_t dev, struct blkfront_info *info) 638 { 639 unsigned long sectors, sector_size; 640 unsigned int binfo; 641 int err; 642 643 if( (info->connected == BLKIF_STATE_CONNECTED) || 644 (info->connected == BLKIF_STATE_SUSPENDED) ) 645 return; 646 647 DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); 648 649 err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev), 650 "sectors", "%lu", §ors, 651 "info", "%u", &binfo, 652 "sector-size", "%lu", §or_size, 653 NULL); 654 if (err) { 655 xenbus_dev_fatal(dev, err, 656 "reading backend fields at %s", 657 xenbus_get_otherend_path(dev)); 658 return; 659 } 660 err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev), 661 "feature-barrier", "%lu", &info->feature_barrier, 662 NULL); 663 if (err) 664 info->feature_barrier = 0; 665 666 device_printf(dev, "%juMB <%s> at %s", 667 (uintmax_t) sectors / (1048576 / sector_size), 668 device_get_desc(dev), 669 xenbus_get_node(dev)); 670 bus_print_child_footer(device_get_parent(dev), dev); 671 672 xlvbd_add(dev, sectors, info->vdevice, binfo, sector_size, info); 673 674 (void)xenbus_set_state(dev, XenbusStateConnected); 675 676 /* Kick pending requests. */ 677 mtx_lock(&blkif_io_lock); 678 info->connected = BLKIF_STATE_CONNECTED; 679 kick_pending_request_queues(info); 680 mtx_unlock(&blkif_io_lock); 681 info->is_ready = 1; 682 683 #if 0 684 add_disk(info->gd); 685 #endif 686 } 687 688 /** 689 * Handle the change of state of the backend to Closing. We must delete our 690 * device-layer structures now, to ensure that writes are flushed through to 691 * the backend. Once is this done, we can switch to Closed in 692 * acknowledgement. 693 */ 694 static void 695 blkfront_closing(device_t dev) 696 { 697 struct blkfront_info *info = device_get_softc(dev); 698 699 DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev)); 700 701 if (info->mi) { 702 DPRINTK("Calling xlvbd_del\n"); 703 xlvbd_del(info); 704 info->mi = NULL; 705 } 706 707 xenbus_set_state(dev, XenbusStateClosed); 708 } 709 710 711 static int 712 blkfront_detach(device_t dev) 713 { 714 struct blkfront_info *info = device_get_softc(dev); 715 716 DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev)); 717 718 blkif_free(info, 0); 719 720 return 0; 721 } 722 723 724 static inline int 725 GET_ID_FROM_FREELIST(struct blkfront_info *info) 726 { 727 unsigned long nfree = info->shadow_free; 728 729 KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree)); 730 info->shadow_free = info->shadow[nfree].req.id; 731 info->shadow[nfree].req.id = 0x0fffffee; /* debug */ 732 atomic_add_int(&blkif_queued_requests, 1); 733 return nfree; 734 } 735 736 static inline void 737 ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id) 738 { 739 info->shadow[id].req.id = info->shadow_free; 740 info->shadow[id].request = 0; 741 info->shadow_free = id; 742 atomic_subtract_int(&blkif_queued_requests, 1); 743 } 744 745 static inline void 746 flush_requests(struct blkfront_info *info) 747 { 748 int notify; 749 750 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); 751 752 if (notify) 753 notify_remote_via_irq(info->irq); 754 } 755 756 static void 757 kick_pending_request_queues(struct blkfront_info *info) 758 { 759 /* XXX check if we can't simplify */ 760 #if 0 761 if (!RING_FULL(&info->ring)) { 762 /* Re-enable calldowns. */ 763 blk_start_queue(info->rq); 764 /* Kick things off immediately. */ 765 do_blkif_request(info->rq); 766 } 767 #endif 768 if (!RING_FULL(&info->ring)) { 769 #if 0 770 sc = LIST_FIRST(&xbsl_head); 771 LIST_REMOVE(sc, entry); 772 /* Re-enable calldowns. */ 773 blk_start_queue(di->rq); 774 #endif 775 /* Kick things off immediately. */ 776 xb_startio(info->sc); 777 } 778 } 779 780 #if 0 781 /* XXX */ 782 static void blkif_restart_queue(void *arg) 783 { 784 struct blkfront_info *info = (struct blkfront_info *)arg; 785 786 mtx_lock(&blkif_io_lock); 787 kick_pending_request_queues(info); 788 mtx_unlock(&blkif_io_lock); 789 } 790 #endif 791 792 static void blkif_restart_queue_callback(void *arg) 793 { 794 #if 0 795 struct blkfront_info *info = (struct blkfront_info *)arg; 796 /* XXX BSD equiv ? */ 797 798 schedule_work(&info->work); 799 #endif 800 } 801 802 static int 803 blkif_open(struct disk *dp) 804 { 805 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 806 807 if (sc == NULL) { 808 printf("xb%d: not found", sc->xb_unit); 809 return (ENXIO); 810 } 811 812 sc->xb_flags |= XB_OPEN; 813 sc->xb_info->users++; 814 return (0); 815 } 816 817 static int 818 blkif_close(struct disk *dp) 819 { 820 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 821 822 if (sc == NULL) 823 return (ENXIO); 824 sc->xb_flags &= ~XB_OPEN; 825 if (--(sc->xb_info->users) == 0) { 826 /* Check whether we have been instructed to close. We will 827 have ignored this request initially, as the device was 828 still mounted. */ 829 device_t dev = sc->xb_info->xbdev; 830 XenbusState state = 831 xenbus_read_driver_state(xenbus_get_otherend_path(dev)); 832 833 if (state == XenbusStateClosing) 834 blkfront_closing(dev); 835 } 836 return (0); 837 } 838 839 static int 840 blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) 841 { 842 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 843 844 if (sc == NULL) 845 return (ENXIO); 846 847 return (ENOTTY); 848 } 849 850 851 /* 852 * blkif_queue_request 853 * 854 * request block io 855 * 856 * id: for guest use only. 857 * operation: BLKIF_OP_{READ,WRITE,PROBE} 858 * buffer: buffer to read/write into. this should be a 859 * virtual address in the guest os. 860 */ 861 static int blkif_queue_request(struct bio *bp) 862 { 863 caddr_t alignbuf; 864 vm_paddr_t buffer_ma; 865 blkif_request_t *ring_req; 866 unsigned long id; 867 uint64_t fsect, lsect; 868 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 869 struct blkfront_info *info = sc->xb_info; 870 int ref; 871 872 if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED)) 873 return 1; 874 875 if (gnttab_alloc_grant_references( 876 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { 877 gnttab_request_free_callback( 878 &info->callback, 879 blkif_restart_queue_callback, 880 info, 881 BLKIF_MAX_SEGMENTS_PER_REQUEST); 882 return 1; 883 } 884 885 /* Check if the buffer is properly aligned */ 886 if ((vm_offset_t)bp->bio_data & PAGE_MASK) { 887 int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE : 888 PAGE_SIZE; 889 caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF, 890 M_NOWAIT); 891 892 alignbuf = (char *)roundup2((u_long)newbuf, align); 893 894 /* save a copy of the current buffer */ 895 bp->bio_driver1 = newbuf; 896 bp->bio_driver2 = alignbuf; 897 898 /* Copy the data for a write */ 899 if (bp->bio_cmd == BIO_WRITE) 900 bcopy(bp->bio_data, alignbuf, bp->bio_bcount); 901 } else 902 alignbuf = bp->bio_data; 903 904 /* Fill out a communications ring structure. */ 905 ring_req = RING_GET_REQUEST(&info->ring, 906 info->ring.req_prod_pvt); 907 id = GET_ID_FROM_FREELIST(info); 908 info->shadow[id].request = (unsigned long)bp; 909 910 ring_req->id = id; 911 ring_req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ : 912 BLKIF_OP_WRITE; 913 914 ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno; 915 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk; 916 917 ring_req->nr_segments = 0; /* XXX not doing scatter/gather since buffer 918 * chaining is not supported. 919 */ 920 921 buffer_ma = vtomach(alignbuf); 922 fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; 923 lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1; 924 /* install a grant reference. */ 925 ref = gnttab_claim_grant_reference(&gref_head); 926 KASSERT( ref != -ENOSPC, ("grant_reference failed") ); 927 928 gnttab_grant_foreign_access_ref( 929 ref, 930 xenbus_get_otherend_id(info->xbdev), 931 buffer_ma >> PAGE_SHIFT, 932 ring_req->operation & 1 ); /* ??? */ 933 info->shadow[id].frame[ring_req->nr_segments] = 934 buffer_ma >> PAGE_SHIFT; 935 936 ring_req->seg[ring_req->nr_segments] = 937 (struct blkif_request_segment) { 938 .gref = ref, 939 .first_sect = fsect, 940 .last_sect = lsect }; 941 942 ring_req->nr_segments++; 943 KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0, 944 ("XEN buffer must be sector aligned")); 945 KASSERT(lsect <= 7, 946 ("XEN disk driver data cannot cross a page boundary")); 947 948 buffer_ma &= ~PAGE_MASK; 949 950 info->ring.req_prod_pvt++; 951 952 /* Keep a private copy so we can reissue requests when recovering. */ 953 info->shadow[id].req = *ring_req; 954 955 gnttab_free_grant_references(gref_head); 956 957 return 0; 958 } 959 960 961 962 /* 963 * Dequeue buffers and place them in the shared communication ring. 964 * Return when no more requests can be accepted or all buffers have 965 * been queued. 966 * 967 * Signal XEN once the ring has been filled out. 968 */ 969 static void 970 xb_startio(struct xb_softc *sc) 971 { 972 struct bio *bp; 973 int queued = 0; 974 struct blkfront_info *info = sc->xb_info; 975 DPRINTK(""); 976 977 mtx_assert(&blkif_io_lock, MA_OWNED); 978 979 while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) { 980 981 if (RING_FULL(&info->ring)) 982 goto wait; 983 984 if (blkif_queue_request(bp)) { 985 wait: 986 bioq_insert_head(&sc->xb_bioq, bp); 987 break; 988 } 989 queued++; 990 } 991 992 if (queued != 0) 993 flush_requests(sc->xb_info); 994 } 995 996 static void 997 blkif_int(void *xsc) 998 { 999 struct xb_softc *sc = NULL; 1000 struct bio *bp; 1001 blkif_response_t *bret; 1002 RING_IDX i, rp; 1003 struct blkfront_info *info = xsc; 1004 DPRINTK(""); 1005 1006 TRACE_ENTER; 1007 1008 mtx_lock(&blkif_io_lock); 1009 1010 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { 1011 mtx_unlock(&blkif_io_lock); 1012 return; 1013 } 1014 1015 again: 1016 rp = info->ring.sring->rsp_prod; 1017 rmb(); /* Ensure we see queued responses up to 'rp'. */ 1018 1019 for (i = info->ring.rsp_cons; i != rp; i++) { 1020 unsigned long id; 1021 1022 bret = RING_GET_RESPONSE(&info->ring, i); 1023 id = bret->id; 1024 bp = (struct bio *)info->shadow[id].request; 1025 1026 blkif_completion(&info->shadow[id]); 1027 1028 ADD_ID_TO_FREELIST(info, id); 1029 1030 switch (bret->operation) { 1031 case BLKIF_OP_READ: 1032 /* had an unaligned buffer that needs to be copied */ 1033 if (bp->bio_driver1) 1034 bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount); 1035 /* FALLTHROUGH */ 1036 case BLKIF_OP_WRITE: 1037 1038 /* free the copy buffer */ 1039 if (bp->bio_driver1) { 1040 free(bp->bio_driver1, M_DEVBUF); 1041 bp->bio_driver1 = NULL; 1042 } 1043 1044 if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) { 1045 printf("Bad return from blkdev data request: %x\n", 1046 bret->status); 1047 bp->bio_flags |= BIO_ERROR; 1048 } 1049 1050 sc = (struct xb_softc *)bp->bio_disk->d_drv1; 1051 1052 if (bp->bio_flags & BIO_ERROR) 1053 bp->bio_error = EIO; 1054 else 1055 bp->bio_resid = 0; 1056 1057 biodone(bp); 1058 break; 1059 default: 1060 panic("received invalid operation"); 1061 break; 1062 } 1063 } 1064 1065 info->ring.rsp_cons = i; 1066 1067 if (i != info->ring.req_prod_pvt) { 1068 int more_to_do; 1069 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); 1070 if (more_to_do) 1071 goto again; 1072 } else { 1073 info->ring.sring->rsp_event = i + 1; 1074 } 1075 1076 kick_pending_request_queues(info); 1077 1078 mtx_unlock(&blkif_io_lock); 1079 } 1080 1081 static void 1082 blkif_free(struct blkfront_info *info, int suspend) 1083 { 1084 1085 /* Prevent new requests being issued until we fix things up. */ 1086 mtx_lock(&blkif_io_lock); 1087 info->connected = suspend ? 1088 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 1089 mtx_unlock(&blkif_io_lock); 1090 1091 /* Free resources associated with old device channel. */ 1092 if (info->ring_ref != GRANT_INVALID_REF) { 1093 gnttab_end_foreign_access(info->ring_ref, 1094 info->ring.sring); 1095 info->ring_ref = GRANT_INVALID_REF; 1096 info->ring.sring = NULL; 1097 } 1098 if (info->irq) 1099 unbind_from_irqhandler(info->irq); 1100 info->irq = 0; 1101 1102 } 1103 1104 static void 1105 blkif_completion(struct blk_shadow *s) 1106 { 1107 int i; 1108 1109 for (i = 0; i < s->req.nr_segments; i++) 1110 gnttab_end_foreign_access(s->req.seg[i].gref, 0UL); 1111 } 1112 1113 static void 1114 blkif_recover(struct blkfront_info *info) 1115 { 1116 int i, j; 1117 blkif_request_t *req; 1118 struct blk_shadow *copy; 1119 1120 if (!info->sc) 1121 return; 1122 1123 /* Stage 1: Make a safe copy of the shadow state. */ 1124 copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO); 1125 memcpy(copy, info->shadow, sizeof(info->shadow)); 1126 1127 /* Stage 2: Set up free list. */ 1128 memset(&info->shadow, 0, sizeof(info->shadow)); 1129 for (i = 0; i < BLK_RING_SIZE; i++) 1130 info->shadow[i].req.id = i+1; 1131 info->shadow_free = info->ring.req_prod_pvt; 1132 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 1133 1134 /* Stage 3: Find pending requests and requeue them. */ 1135 for (i = 0; i < BLK_RING_SIZE; i++) { 1136 /* Not in use? */ 1137 if (copy[i].request == 0) 1138 continue; 1139 1140 /* Grab a request slot and copy shadow state into it. */ 1141 req = RING_GET_REQUEST( 1142 &info->ring, info->ring.req_prod_pvt); 1143 *req = copy[i].req; 1144 1145 /* We get a new request id, and must reset the shadow state. */ 1146 req->id = GET_ID_FROM_FREELIST(info); 1147 memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); 1148 1149 /* Rewrite any grant references invalidated by suspend/resume. */ 1150 for (j = 0; j < req->nr_segments; j++) 1151 gnttab_grant_foreign_access_ref( 1152 req->seg[j].gref, 1153 xenbus_get_otherend_id(info->xbdev), 1154 pfn_to_mfn(info->shadow[req->id].frame[j]), 1155 0 /* assume not readonly */); 1156 1157 info->shadow[req->id].req = *req; 1158 1159 info->ring.req_prod_pvt++; 1160 } 1161 1162 free(copy, M_DEVBUF); 1163 1164 xenbus_set_state(info->xbdev, XenbusStateConnected); 1165 1166 /* Now safe for us to use the shared ring */ 1167 mtx_lock(&blkif_io_lock); 1168 info->connected = BLKIF_STATE_CONNECTED; 1169 mtx_unlock(&blkif_io_lock); 1170 1171 /* Send off requeued requests */ 1172 mtx_lock(&blkif_io_lock); 1173 flush_requests(info); 1174 1175 /* Kick any other new requests queued since we resumed */ 1176 kick_pending_request_queues(info); 1177 mtx_unlock(&blkif_io_lock); 1178 } 1179 1180 /* ** Driver registration ** */ 1181 static device_method_t blkfront_methods[] = { 1182 /* Device interface */ 1183 DEVMETHOD(device_probe, blkfront_probe), 1184 DEVMETHOD(device_attach, blkfront_attach), 1185 DEVMETHOD(device_detach, blkfront_detach), 1186 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1187 DEVMETHOD(device_suspend, blkfront_suspend), 1188 DEVMETHOD(device_resume, blkfront_resume), 1189 1190 /* Xenbus interface */ 1191 DEVMETHOD(xenbus_backend_changed, blkfront_backend_changed), 1192 1193 { 0, 0 } 1194 }; 1195 1196 static driver_t blkfront_driver = { 1197 "xbd", 1198 blkfront_methods, 1199 sizeof(struct blkfront_info), 1200 }; 1201 devclass_t blkfront_devclass; 1202 1203 DRIVER_MODULE(xbd, xenbus, blkfront_driver, blkfront_devclass, 0, 0); 1204 1205 MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */ 1206 1207