1 /* 2 * XenBSD block device driver 3 * 4 * Copyright (c) 2009 Scott Long, Yahoo! 5 * Copyright (c) 2009 Frank Suchomel, Citrix 6 * Copyright (c) 2009 Doug F. Rabson, Citrix 7 * Copyright (c) 2005 Kip Macy 8 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 9 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 10 * 11 * 12 * Permission is hereby granted, free of charge, to any person obtaining a copy 13 * of this software and associated documentation files (the "Software"), to 14 * deal in the Software without restriction, including without limitation the 15 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 16 * sell copies of the Software, and to permit persons to whom the Software is 17 * furnished to do so, subject to the following conditions: 18 * 19 * The above copyright notice and this permission notice shall be included in 20 * all copies or substantial portions of the Software. 21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 27 * DEALINGS IN THE SOFTWARE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/kernel.h> 37 #include <vm/vm.h> 38 #include <vm/pmap.h> 39 40 #include <sys/bio.h> 41 #include <sys/bus.h> 42 #include <sys/conf.h> 43 #include <sys/module.h> 44 45 #include <machine/bus.h> 46 #include <sys/rman.h> 47 #include <machine/resource.h> 48 #include <machine/intr_machdep.h> 49 #include <machine/vmparam.h> 50 #include <sys/bus_dma.h> 51 52 #include <machine/_inttypes.h> 53 #include <machine/xen/xen-os.h> 54 #include <machine/xen/xenvar.h> 55 #include <machine/xen/xenfunc.h> 56 57 #include <xen/hypervisor.h> 58 #include <xen/xen_intr.h> 59 #include <xen/evtchn.h> 60 #include <xen/gnttab.h> 61 #include <xen/interface/grant_table.h> 62 #include <xen/interface/io/protocols.h> 63 #include <xen/xenbus/xenbusvar.h> 64 65 #include <geom/geom_disk.h> 66 67 #include <dev/xen/blkfront/block.h> 68 69 #include "xenbus_if.h" 70 71 /* prototypes */ 72 static void xb_free_command(struct xb_command *cm); 73 static void xb_startio(struct xb_softc *sc); 74 static void blkfront_connect(struct xb_softc *); 75 static void blkfront_closing(device_t); 76 static int blkfront_detach(device_t); 77 static int setup_blkring(struct xb_softc *); 78 static void blkif_int(void *); 79 static void blkfront_initialize(struct xb_softc *); 80 #if 0 81 static void blkif_recover(struct xb_softc *); 82 #endif 83 static int blkif_completion(struct xb_command *); 84 static void blkif_free(struct xb_softc *, int); 85 static void blkif_queue_cb(void *, bus_dma_segment_t *, int, int); 86 87 MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data"); 88 89 #define GRANT_INVALID_REF 0 90 91 /* Control whether runtime update of vbds is enabled. */ 92 #define ENABLE_VBD_UPDATE 0 93 94 #if ENABLE_VBD_UPDATE 95 static void vbd_update(void); 96 #endif 97 98 #define BLKIF_STATE_DISCONNECTED 0 99 #define BLKIF_STATE_CONNECTED 1 100 #define BLKIF_STATE_SUSPENDED 2 101 102 #ifdef notyet 103 static char *blkif_state_name[] = { 104 [BLKIF_STATE_DISCONNECTED] = "disconnected", 105 [BLKIF_STATE_CONNECTED] = "connected", 106 [BLKIF_STATE_SUSPENDED] = "closed", 107 }; 108 109 static char * blkif_status_name[] = { 110 [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", 111 [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", 112 [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", 113 [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", 114 }; 115 #endif 116 117 #if 0 118 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args) 119 #else 120 #define DPRINTK(fmt, args...) 121 #endif 122 123 static int blkif_open(struct disk *dp); 124 static int blkif_close(struct disk *dp); 125 static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td); 126 static int blkif_queue_request(struct xb_softc *sc, struct xb_command *cm); 127 static void xb_strategy(struct bio *bp); 128 129 // In order to quiesce the device during kernel dumps, outstanding requests to 130 // DOM0 for disk reads/writes need to be accounted for. 131 static int xb_dump(void *, void *, vm_offset_t, off_t, size_t); 132 133 /* XXX move to xb_vbd.c when VBD update support is added */ 134 #define MAX_VBDS 64 135 136 #define XBD_SECTOR_SIZE 512 /* XXX: assume for now */ 137 #define XBD_SECTOR_SHFT 9 138 139 /* 140 * Translate Linux major/minor to an appropriate name and unit 141 * number. For HVM guests, this allows us to use the same drive names 142 * with blkfront as the emulated drives, easing transition slightly. 143 */ 144 static void 145 blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name) 146 { 147 static struct vdev_info { 148 int major; 149 int shift; 150 int base; 151 const char *name; 152 } info[] = { 153 {3, 6, 0, "ad"}, /* ide0 */ 154 {22, 6, 2, "ad"}, /* ide1 */ 155 {33, 6, 4, "ad"}, /* ide2 */ 156 {34, 6, 6, "ad"}, /* ide3 */ 157 {56, 6, 8, "ad"}, /* ide4 */ 158 {57, 6, 10, "ad"}, /* ide5 */ 159 {88, 6, 12, "ad"}, /* ide6 */ 160 {89, 6, 14, "ad"}, /* ide7 */ 161 {90, 6, 16, "ad"}, /* ide8 */ 162 {91, 6, 18, "ad"}, /* ide9 */ 163 164 {8, 4, 0, "da"}, /* scsi disk0 */ 165 {65, 4, 16, "da"}, /* scsi disk1 */ 166 {66, 4, 32, "da"}, /* scsi disk2 */ 167 {67, 4, 48, "da"}, /* scsi disk3 */ 168 {68, 4, 64, "da"}, /* scsi disk4 */ 169 {69, 4, 80, "da"}, /* scsi disk5 */ 170 {70, 4, 96, "da"}, /* scsi disk6 */ 171 {71, 4, 112, "da"}, /* scsi disk7 */ 172 {128, 4, 128, "da"}, /* scsi disk8 */ 173 {129, 4, 144, "da"}, /* scsi disk9 */ 174 {130, 4, 160, "da"}, /* scsi disk10 */ 175 {131, 4, 176, "da"}, /* scsi disk11 */ 176 {132, 4, 192, "da"}, /* scsi disk12 */ 177 {133, 4, 208, "da"}, /* scsi disk13 */ 178 {134, 4, 224, "da"}, /* scsi disk14 */ 179 {135, 4, 240, "da"}, /* scsi disk15 */ 180 181 {202, 4, 0, "xbd"}, /* xbd */ 182 183 {0, 0, 0, NULL}, 184 }; 185 int major = vdevice >> 8; 186 int minor = vdevice & 0xff; 187 int i; 188 189 if (vdevice & (1 << 28)) { 190 *unit = (vdevice & ((1 << 28) - 1)) >> 8; 191 *name = "xbd"; 192 } 193 194 for (i = 0; info[i].major; i++) { 195 if (info[i].major == major) { 196 *unit = info[i].base + (minor >> info[i].shift); 197 *name = info[i].name; 198 return; 199 } 200 } 201 202 *unit = minor >> 4; 203 *name = "xbd"; 204 } 205 206 int 207 xlvbd_add(struct xb_softc *sc, blkif_sector_t sectors, 208 int vdevice, uint16_t vdisk_info, unsigned long sector_size) 209 { 210 int unit, error = 0; 211 const char *name; 212 213 blkfront_vdevice_to_unit(vdevice, &unit, &name); 214 215 sc->xb_unit = unit; 216 217 if (strcmp(name, "xbd")) 218 device_printf(sc->xb_dev, "attaching as %s%d\n", name, unit); 219 220 sc->xb_disk = disk_alloc(); 221 sc->xb_disk->d_unit = sc->xb_unit; 222 sc->xb_disk->d_open = blkif_open; 223 sc->xb_disk->d_close = blkif_close; 224 sc->xb_disk->d_ioctl = blkif_ioctl; 225 sc->xb_disk->d_strategy = xb_strategy; 226 sc->xb_disk->d_dump = xb_dump; 227 sc->xb_disk->d_name = name; 228 sc->xb_disk->d_drv1 = sc; 229 sc->xb_disk->d_sectorsize = sector_size; 230 231 sc->xb_disk->d_mediasize = sectors * sector_size; 232 sc->xb_disk->d_maxsize = sc->max_request_size; 233 sc->xb_disk->d_flags = 0; 234 disk_create(sc->xb_disk, DISK_VERSION_00); 235 236 return error; 237 } 238 239 /************************ end VBD support *****************/ 240 241 /* 242 * Read/write routine for a buffer. Finds the proper unit, place it on 243 * the sortq and kick the controller. 244 */ 245 static void 246 xb_strategy(struct bio *bp) 247 { 248 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 249 250 /* bogus disk? */ 251 if (sc == NULL) { 252 bp->bio_error = EINVAL; 253 bp->bio_flags |= BIO_ERROR; 254 bp->bio_resid = bp->bio_bcount; 255 biodone(bp); 256 return; 257 } 258 259 /* 260 * Place it in the queue of disk activities for this disk 261 */ 262 mtx_lock(&sc->xb_io_lock); 263 264 xb_enqueue_bio(sc, bp); 265 xb_startio(sc); 266 267 mtx_unlock(&sc->xb_io_lock); 268 return; 269 } 270 271 static void 272 xb_bio_complete(struct xb_softc *sc, struct xb_command *cm) 273 { 274 struct bio *bp; 275 276 bp = cm->bp; 277 278 if ( unlikely(cm->status != BLKIF_RSP_OKAY) ) { 279 disk_err(bp, "disk error" , -1, 0); 280 printf(" status: %x\n", cm->status); 281 bp->bio_flags |= BIO_ERROR; 282 } 283 284 if (bp->bio_flags & BIO_ERROR) 285 bp->bio_error = EIO; 286 else 287 bp->bio_resid = 0; 288 289 xb_free_command(cm); 290 biodone(bp); 291 } 292 293 // Quiesce the disk writes for a dump file before allowing the next buffer. 294 static void 295 xb_quiesce(struct xb_softc *sc) 296 { 297 int mtd; 298 299 // While there are outstanding requests 300 while (!TAILQ_EMPTY(&sc->cm_busy)) { 301 RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, mtd); 302 if (mtd) { 303 /* Recieved request completions, update queue. */ 304 blkif_int(sc); 305 } 306 if (!TAILQ_EMPTY(&sc->cm_busy)) { 307 /* 308 * Still pending requests, wait for the disk i/o 309 * to complete. 310 */ 311 HYPERVISOR_yield(); 312 } 313 } 314 } 315 316 /* Kernel dump function for a paravirtualized disk device */ 317 static void 318 xb_dump_complete(struct xb_command *cm) 319 { 320 321 xb_enqueue_complete(cm); 322 } 323 324 static int 325 xb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, 326 size_t length) 327 { 328 struct disk *dp = arg; 329 struct xb_softc *sc = (struct xb_softc *) dp->d_drv1; 330 struct xb_command *cm; 331 size_t chunk; 332 int sbp; 333 int rc = 0; 334 335 if (length <= 0) 336 return (rc); 337 338 xb_quiesce(sc); /* All quiet on the western front. */ 339 340 /* 341 * If this lock is held, then this module is failing, and a 342 * successful kernel dump is highly unlikely anyway. 343 */ 344 mtx_lock(&sc->xb_io_lock); 345 346 /* Split the 64KB block as needed */ 347 for (sbp=0; length > 0; sbp++) { 348 cm = xb_dequeue_free(sc); 349 if (cm == NULL) { 350 mtx_unlock(&sc->xb_io_lock); 351 device_printf(sc->xb_dev, "dump: no more commands?\n"); 352 return (EBUSY); 353 } 354 355 if (gnttab_alloc_grant_references(sc->max_request_segments, 356 &cm->gref_head) != 0) { 357 xb_free_command(cm); 358 mtx_unlock(&sc->xb_io_lock); 359 device_printf(sc->xb_dev, "no more grant allocs?\n"); 360 return (EBUSY); 361 } 362 363 chunk = length > sc->max_request_size 364 ? sc->max_request_size : length; 365 cm->data = virtual; 366 cm->datalen = chunk; 367 cm->operation = BLKIF_OP_WRITE; 368 cm->sector_number = offset / dp->d_sectorsize; 369 cm->cm_complete = xb_dump_complete; 370 371 xb_enqueue_ready(cm); 372 373 length -= chunk; 374 offset += chunk; 375 virtual = (char *) virtual + chunk; 376 } 377 378 /* Tell DOM0 to do the I/O */ 379 xb_startio(sc); 380 mtx_unlock(&sc->xb_io_lock); 381 382 /* Poll for the completion. */ 383 xb_quiesce(sc); /* All quite on the eastern front */ 384 385 /* If there were any errors, bail out... */ 386 while ((cm = xb_dequeue_complete(sc)) != NULL) { 387 if (cm->status != BLKIF_RSP_OKAY) { 388 device_printf(sc->xb_dev, 389 "Dump I/O failed at sector %jd\n", 390 cm->sector_number); 391 rc = EIO; 392 } 393 xb_free_command(cm); 394 } 395 396 return (rc); 397 } 398 399 400 static int 401 blkfront_probe(device_t dev) 402 { 403 404 if (!strcmp(xenbus_get_type(dev), "vbd")) { 405 device_set_desc(dev, "Virtual Block Device"); 406 device_quiet(dev); 407 return (0); 408 } 409 410 return (ENXIO); 411 } 412 413 /* 414 * Setup supplies the backend dir, virtual device. We place an event 415 * channel and shared frame entries. We watch backend to wait if it's 416 * ok. 417 */ 418 static int 419 blkfront_attach(device_t dev) 420 { 421 struct xb_softc *sc; 422 const char *name; 423 int error; 424 int vdevice; 425 int i; 426 int unit; 427 428 /* FIXME: Use dynamic device id if this is not set. */ 429 error = xs_scanf(XST_NIL, xenbus_get_node(dev), 430 "virtual-device", NULL, "%i", &vdevice); 431 if (error) { 432 xenbus_dev_fatal(dev, error, "reading virtual-device"); 433 device_printf(dev, "Couldn't determine virtual device.\n"); 434 return (error); 435 } 436 437 blkfront_vdevice_to_unit(vdevice, &unit, &name); 438 if (!strcmp(name, "xbd")) 439 device_set_unit(dev, unit); 440 441 sc = device_get_softc(dev); 442 mtx_init(&sc->xb_io_lock, "blkfront i/o lock", NULL, MTX_DEF); 443 xb_initq_free(sc); 444 xb_initq_busy(sc); 445 xb_initq_ready(sc); 446 xb_initq_complete(sc); 447 xb_initq_bio(sc); 448 for (i = 0; i < XBF_MAX_RING_PAGES; i++) 449 sc->ring_ref[i] = GRANT_INVALID_REF; 450 451 sc->xb_dev = dev; 452 sc->vdevice = vdevice; 453 sc->connected = BLKIF_STATE_DISCONNECTED; 454 455 /* Front end dir is a number, which is used as the id. */ 456 sc->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0); 457 458 /* Wait for backend device to publish its protocol capabilities. */ 459 xenbus_set_state(dev, XenbusStateInitialising); 460 461 return (0); 462 } 463 464 static int 465 blkfront_suspend(device_t dev) 466 { 467 struct xb_softc *sc = device_get_softc(dev); 468 469 /* Prevent new requests being issued until we fix things up. */ 470 mtx_lock(&sc->xb_io_lock); 471 sc->connected = BLKIF_STATE_SUSPENDED; 472 mtx_unlock(&sc->xb_io_lock); 473 474 return (0); 475 } 476 477 static int 478 blkfront_resume(device_t dev) 479 { 480 #if 0 481 struct xb_softc *sc = device_get_softc(dev); 482 483 DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev)); 484 485 /* XXX This can't work!!! */ 486 blkif_free(sc, 1); 487 blkfront_initialize(sc); 488 if (sc->connected == BLKIF_STATE_SUSPENDED) 489 blkif_recover(sc); 490 #endif 491 return (0); 492 } 493 494 static void 495 blkfront_initialize(struct xb_softc *sc) 496 { 497 const char *otherend_path; 498 const char *node_path; 499 int error; 500 int i; 501 502 if (xenbus_get_state(sc->xb_dev) != XenbusStateInitialising) 503 return; 504 505 /* 506 * Protocol defaults valid even if negotiation for a 507 * setting fails. 508 */ 509 sc->ring_pages = 1; 510 sc->max_requests = BLKIF_MAX_RING_REQUESTS(PAGE_SIZE); 511 sc->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK; 512 sc->max_request_size = (sc->max_request_segments - 1) * PAGE_SIZE; 513 sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments); 514 515 /* 516 * Protocol negotiation. 517 * 518 * \note xs_gather() returns on the first encountered error, so 519 * we must use independant calls in order to guarantee 520 * we don't miss information in a sparsly populated back-end 521 * tree. 522 */ 523 otherend_path = xenbus_get_otherend_path(sc->xb_dev); 524 node_path = xenbus_get_node(sc->xb_dev); 525 (void)xs_scanf(XST_NIL, otherend_path, 526 "max-ring-pages", NULL, "%" PRIu32, 527 &sc->ring_pages); 528 529 (void)xs_scanf(XST_NIL, otherend_path, 530 "max-requests", NULL, "%" PRIu32, 531 &sc->max_requests); 532 533 (void)xs_scanf(XST_NIL, otherend_path, 534 "max-request-segments", NULL, "%" PRIu32, 535 &sc->max_request_segments); 536 537 (void)xs_scanf(XST_NIL, otherend_path, 538 "max-request-size", NULL, "%" PRIu32, 539 &sc->max_request_size); 540 541 if (sc->ring_pages > XBF_MAX_RING_PAGES) { 542 device_printf(sc->xb_dev, "Back-end specified ring-pages of " 543 "%u limited to front-end limit of %zu.\n", 544 sc->ring_pages, XBF_MAX_RING_PAGES); 545 sc->ring_pages = XBF_MAX_RING_PAGES; 546 } 547 548 if (sc->max_requests > XBF_MAX_REQUESTS) { 549 device_printf(sc->xb_dev, "Back-end specified max_requests of " 550 "%u limited to front-end limit of %u.\n", 551 sc->max_requests, XBF_MAX_REQUESTS); 552 sc->max_requests = XBF_MAX_REQUESTS; 553 } 554 555 if (sc->max_request_segments > XBF_MAX_SEGMENTS_PER_REQUEST) { 556 device_printf(sc->xb_dev, "Back-end specificed " 557 "max_requests_segments of %u limited to " 558 "front-end limit of %u.\n", 559 sc->max_request_segments, 560 XBF_MAX_SEGMENTS_PER_REQUEST); 561 sc->max_request_segments = XBF_MAX_SEGMENTS_PER_REQUEST; 562 } 563 564 if (sc->max_request_size > XBF_MAX_REQUEST_SIZE) { 565 device_printf(sc->xb_dev, "Back-end specificed " 566 "max_request_size of %u limited to front-end " 567 "limit of %u.\n", sc->max_request_size, 568 XBF_MAX_REQUEST_SIZE); 569 sc->max_request_size = XBF_MAX_REQUEST_SIZE; 570 } 571 sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments); 572 573 /* Allocate datastructures based on negotiated values. */ 574 error = bus_dma_tag_create(NULL, /* parent */ 575 512, PAGE_SIZE, /* algnmnt, boundary */ 576 BUS_SPACE_MAXADDR, /* lowaddr */ 577 BUS_SPACE_MAXADDR, /* highaddr */ 578 NULL, NULL, /* filter, filterarg */ 579 sc->max_request_size, 580 sc->max_request_segments, 581 PAGE_SIZE, /* maxsegsize */ 582 BUS_DMA_ALLOCNOW, /* flags */ 583 busdma_lock_mutex, /* lockfunc */ 584 &sc->xb_io_lock, /* lockarg */ 585 &sc->xb_io_dmat); 586 if (error != 0) { 587 xenbus_dev_fatal(sc->xb_dev, error, 588 "Cannot allocate parent DMA tag\n"); 589 return; 590 } 591 592 /* Per-transaction data allocation. */ 593 sc->shadow = malloc(sizeof(*sc->shadow) * sc->max_requests, 594 M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); 595 if (sc->shadow == NULL) { 596 xenbus_dev_fatal(sc->xb_dev, error, 597 "Cannot allocate request structures\n"); 598 } 599 600 for (i = 0; i < sc->max_requests; i++) { 601 struct xb_command *cm; 602 603 cm = &sc->shadow[i]; 604 cm->sg_refs = malloc(sizeof(grant_ref_t) 605 * sc->max_request_segments, 606 M_XENBLOCKFRONT, M_NOWAIT); 607 if (cm->sg_refs == NULL) 608 break; 609 cm->id = i; 610 cm->cm_sc = sc; 611 if (bus_dmamap_create(sc->xb_io_dmat, 0, &cm->map) != 0) 612 break; 613 xb_free_command(cm); 614 } 615 616 if (setup_blkring(sc) != 0) 617 return; 618 619 error = xs_printf(XST_NIL, node_path, 620 "ring-pages","%u", sc->ring_pages); 621 if (error) { 622 xenbus_dev_fatal(sc->xb_dev, error, 623 "writing %s/ring-pages", 624 node_path); 625 return; 626 } 627 628 error = xs_printf(XST_NIL, node_path, 629 "max-requests","%u", sc->max_requests); 630 if (error) { 631 xenbus_dev_fatal(sc->xb_dev, error, 632 "writing %s/max-requests", 633 node_path); 634 return; 635 } 636 637 error = xs_printf(XST_NIL, node_path, 638 "max-request-segments","%u", sc->max_request_segments); 639 if (error) { 640 xenbus_dev_fatal(sc->xb_dev, error, 641 "writing %s/max-request-segments", 642 node_path); 643 return; 644 } 645 646 error = xs_printf(XST_NIL, node_path, 647 "max-request-size","%u", sc->max_request_size); 648 if (error) { 649 xenbus_dev_fatal(sc->xb_dev, error, 650 "writing %s/max-request-size", 651 node_path); 652 return; 653 } 654 655 error = xs_printf(XST_NIL, node_path, "event-channel", 656 "%u", irq_to_evtchn_port(sc->irq)); 657 if (error) { 658 xenbus_dev_fatal(sc->xb_dev, error, 659 "writing %s/event-channel", 660 node_path); 661 return; 662 } 663 664 error = xs_printf(XST_NIL, node_path, 665 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); 666 if (error) { 667 xenbus_dev_fatal(sc->xb_dev, error, 668 "writing %s/protocol", 669 node_path); 670 return; 671 } 672 673 xenbus_set_state(sc->xb_dev, XenbusStateInitialised); 674 } 675 676 static int 677 setup_blkring(struct xb_softc *sc) 678 { 679 blkif_sring_t *sring; 680 uintptr_t sring_page_addr; 681 int error; 682 int i; 683 684 sring = malloc(sc->ring_pages * PAGE_SIZE, M_XENBLOCKFRONT, 685 M_NOWAIT|M_ZERO); 686 if (sring == NULL) { 687 xenbus_dev_fatal(sc->xb_dev, ENOMEM, "allocating shared ring"); 688 return (ENOMEM); 689 } 690 SHARED_RING_INIT(sring); 691 FRONT_RING_INIT(&sc->ring, sring, sc->ring_pages * PAGE_SIZE); 692 693 for (i = 0, sring_page_addr = (uintptr_t)sring; 694 i < sc->ring_pages; 695 i++, sring_page_addr += PAGE_SIZE) { 696 697 error = xenbus_grant_ring(sc->xb_dev, 698 (vtomach(sring_page_addr) >> PAGE_SHIFT), &sc->ring_ref[i]); 699 if (error) { 700 xenbus_dev_fatal(sc->xb_dev, error, 701 "granting ring_ref(%d)", i); 702 return (error); 703 } 704 } 705 error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev), 706 "ring-ref","%u", sc->ring_ref[0]); 707 if (error) { 708 xenbus_dev_fatal(sc->xb_dev, error, "writing %s/ring-ref", 709 xenbus_get_node(sc->xb_dev)); 710 return (error); 711 } 712 for (i = 1; i < sc->ring_pages; i++) { 713 char ring_ref_name[]= "ring_refXX"; 714 715 snprintf(ring_ref_name, sizeof(ring_ref_name), "ring-ref%u", i); 716 error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev), 717 ring_ref_name, "%u", sc->ring_ref[i]); 718 if (error) { 719 xenbus_dev_fatal(sc->xb_dev, error, "writing %s/%s", 720 xenbus_get_node(sc->xb_dev), 721 ring_ref_name); 722 return (error); 723 } 724 } 725 726 error = bind_listening_port_to_irqhandler( 727 xenbus_get_otherend_id(sc->xb_dev), 728 "xbd", (driver_intr_t *)blkif_int, sc, 729 INTR_TYPE_BIO | INTR_MPSAFE, &sc->irq); 730 if (error) { 731 xenbus_dev_fatal(sc->xb_dev, error, 732 "bind_evtchn_to_irqhandler failed"); 733 return (error); 734 } 735 736 return (0); 737 } 738 739 /** 740 * Callback received when the backend's state changes. 741 */ 742 static int 743 blkfront_backend_changed(device_t dev, XenbusState backend_state) 744 { 745 struct xb_softc *sc = device_get_softc(dev); 746 747 DPRINTK("backend_state=%d\n", backend_state); 748 749 switch (backend_state) { 750 case XenbusStateUnknown: 751 case XenbusStateInitialising: 752 case XenbusStateReconfigured: 753 case XenbusStateReconfiguring: 754 case XenbusStateClosed: 755 break; 756 757 case XenbusStateInitWait: 758 blkfront_initialize(sc); 759 break; 760 761 case XenbusStateInitialised: 762 case XenbusStateConnected: 763 blkfront_initialize(sc); 764 blkfront_connect(sc); 765 break; 766 767 case XenbusStateClosing: 768 if (sc->users > 0) 769 xenbus_dev_error(dev, -EBUSY, 770 "Device in use; refusing to close"); 771 else 772 blkfront_closing(dev); 773 break; 774 } 775 776 return (0); 777 } 778 779 /* 780 ** Invoked when the backend is finally 'ready' (and has told produced 781 ** the details about the physical device - #sectors, size, etc). 782 */ 783 static void 784 blkfront_connect(struct xb_softc *sc) 785 { 786 device_t dev = sc->xb_dev; 787 unsigned long sectors, sector_size; 788 unsigned int binfo; 789 int err, feature_barrier; 790 791 if( (sc->connected == BLKIF_STATE_CONNECTED) || 792 (sc->connected == BLKIF_STATE_SUSPENDED) ) 793 return; 794 795 DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); 796 797 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), 798 "sectors", "%lu", §ors, 799 "info", "%u", &binfo, 800 "sector-size", "%lu", §or_size, 801 NULL); 802 if (err) { 803 xenbus_dev_fatal(dev, err, 804 "reading backend fields at %s", 805 xenbus_get_otherend_path(dev)); 806 return; 807 } 808 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), 809 "feature-barrier", "%lu", &feature_barrier, 810 NULL); 811 if (!err || feature_barrier) 812 sc->xb_flags |= XB_BARRIER; 813 814 device_printf(dev, "%juMB <%s> at %s", 815 (uintmax_t) sectors / (1048576 / sector_size), 816 device_get_desc(dev), 817 xenbus_get_node(dev)); 818 bus_print_child_footer(device_get_parent(dev), dev); 819 820 xlvbd_add(sc, sectors, sc->vdevice, binfo, sector_size); 821 822 (void)xenbus_set_state(dev, XenbusStateConnected); 823 824 /* Kick pending requests. */ 825 mtx_lock(&sc->xb_io_lock); 826 sc->connected = BLKIF_STATE_CONNECTED; 827 xb_startio(sc); 828 sc->xb_flags |= XB_READY; 829 mtx_unlock(&sc->xb_io_lock); 830 831 } 832 833 /** 834 * Handle the change of state of the backend to Closing. We must delete our 835 * device-layer structures now, to ensure that writes are flushed through to 836 * the backend. Once this is done, we can switch to Closed in 837 * acknowledgement. 838 */ 839 static void 840 blkfront_closing(device_t dev) 841 { 842 struct xb_softc *sc = device_get_softc(dev); 843 844 xenbus_set_state(dev, XenbusStateClosing); 845 846 DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev)); 847 848 if (sc->xb_disk != NULL) { 849 disk_destroy(sc->xb_disk); 850 sc->xb_disk = NULL; 851 } 852 853 xenbus_set_state(dev, XenbusStateClosed); 854 } 855 856 857 static int 858 blkfront_detach(device_t dev) 859 { 860 struct xb_softc *sc = device_get_softc(dev); 861 862 DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev)); 863 864 blkif_free(sc, 0); 865 mtx_destroy(&sc->xb_io_lock); 866 867 return 0; 868 } 869 870 871 static inline void 872 flush_requests(struct xb_softc *sc) 873 { 874 int notify; 875 876 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->ring, notify); 877 878 if (notify) 879 notify_remote_via_irq(sc->irq); 880 } 881 882 static void 883 blkif_restart_queue_callback(void *arg) 884 { 885 struct xb_softc *sc = arg; 886 887 mtx_lock(&sc->xb_io_lock); 888 889 xb_startio(sc); 890 891 mtx_unlock(&sc->xb_io_lock); 892 } 893 894 static int 895 blkif_open(struct disk *dp) 896 { 897 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 898 899 if (sc == NULL) { 900 printf("xb%d: not found", sc->xb_unit); 901 return (ENXIO); 902 } 903 904 sc->xb_flags |= XB_OPEN; 905 sc->users++; 906 return (0); 907 } 908 909 static int 910 blkif_close(struct disk *dp) 911 { 912 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 913 914 if (sc == NULL) 915 return (ENXIO); 916 sc->xb_flags &= ~XB_OPEN; 917 if (--(sc->users) == 0) { 918 /* Check whether we have been instructed to close. We will 919 have ignored this request initially, as the device was 920 still mounted. */ 921 device_t dev = sc->xb_dev; 922 XenbusState state = 923 xenbus_read_driver_state(xenbus_get_otherend_path(dev)); 924 925 if (state == XenbusStateClosing) 926 blkfront_closing(dev); 927 } 928 return (0); 929 } 930 931 static int 932 blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) 933 { 934 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 935 936 if (sc == NULL) 937 return (ENXIO); 938 939 return (ENOTTY); 940 } 941 942 static void 943 xb_free_command(struct xb_command *cm) 944 { 945 946 KASSERT((cm->cm_flags & XB_ON_XBQ_MASK) == 0, 947 ("Freeing command that is still on a queue\n")); 948 949 cm->cm_flags = 0; 950 cm->bp = NULL; 951 cm->cm_complete = NULL; 952 xb_enqueue_free(cm); 953 } 954 955 /* 956 * blkif_queue_request 957 * 958 * request block io 959 * 960 * id: for guest use only. 961 * operation: BLKIF_OP_{READ,WRITE,PROBE} 962 * buffer: buffer to read/write into. this should be a 963 * virtual address in the guest os. 964 */ 965 static struct xb_command * 966 xb_bio_command(struct xb_softc *sc) 967 { 968 struct xb_command *cm; 969 struct bio *bp; 970 971 if (unlikely(sc->connected != BLKIF_STATE_CONNECTED)) 972 return (NULL); 973 974 bp = xb_dequeue_bio(sc); 975 if (bp == NULL) 976 return (NULL); 977 978 if ((cm = xb_dequeue_free(sc)) == NULL) { 979 xb_requeue_bio(sc, bp); 980 return (NULL); 981 } 982 983 if (gnttab_alloc_grant_references(sc->max_request_segments, 984 &cm->gref_head) != 0) { 985 gnttab_request_free_callback(&sc->callback, 986 blkif_restart_queue_callback, sc, 987 sc->max_request_segments); 988 xb_requeue_bio(sc, bp); 989 xb_enqueue_free(cm); 990 sc->xb_flags |= XB_FROZEN; 991 return (NULL); 992 } 993 994 cm->bp = bp; 995 cm->data = bp->bio_data; 996 cm->datalen = bp->bio_bcount; 997 cm->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ : 998 BLKIF_OP_WRITE; 999 cm->sector_number = (blkif_sector_t)bp->bio_pblkno; 1000 1001 return (cm); 1002 } 1003 1004 static int 1005 blkif_queue_request(struct xb_softc *sc, struct xb_command *cm) 1006 { 1007 int error; 1008 1009 error = bus_dmamap_load(sc->xb_io_dmat, cm->map, cm->data, cm->datalen, 1010 blkif_queue_cb, cm, 0); 1011 if (error == EINPROGRESS) { 1012 printf("EINPROGRESS\n"); 1013 sc->xb_flags |= XB_FROZEN; 1014 cm->cm_flags |= XB_CMD_FROZEN; 1015 return (0); 1016 } 1017 1018 return (error); 1019 } 1020 1021 static void 1022 blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 1023 { 1024 struct xb_softc *sc; 1025 struct xb_command *cm; 1026 blkif_request_t *ring_req; 1027 struct blkif_request_segment *sg; 1028 struct blkif_request_segment *last_block_sg; 1029 grant_ref_t *sg_ref; 1030 vm_paddr_t buffer_ma; 1031 uint64_t fsect, lsect; 1032 int ref; 1033 int op; 1034 int block_segs; 1035 1036 cm = arg; 1037 sc = cm->cm_sc; 1038 1039 //printf("%s: Start\n", __func__); 1040 if (error) { 1041 printf("error %d in blkif_queue_cb\n", error); 1042 cm->bp->bio_error = EIO; 1043 biodone(cm->bp); 1044 xb_free_command(cm); 1045 return; 1046 } 1047 1048 /* Fill out a communications ring structure. */ 1049 ring_req = RING_GET_REQUEST(&sc->ring, sc->ring.req_prod_pvt); 1050 sc->ring.req_prod_pvt++; 1051 ring_req->id = cm->id; 1052 ring_req->operation = cm->operation; 1053 ring_req->sector_number = cm->sector_number; 1054 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk; 1055 ring_req->nr_segments = nsegs; 1056 cm->nseg = nsegs; 1057 1058 block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK); 1059 sg = ring_req->seg; 1060 last_block_sg = sg + block_segs; 1061 sg_ref = cm->sg_refs; 1062 1063 while (1) { 1064 1065 while (sg < last_block_sg) { 1066 buffer_ma = segs->ds_addr; 1067 fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; 1068 lsect = fsect + (segs->ds_len >> XBD_SECTOR_SHFT) - 1; 1069 1070 KASSERT(lsect <= 7, ("XEN disk driver data cannot " 1071 "cross a page boundary")); 1072 1073 /* install a grant reference. */ 1074 ref = gnttab_claim_grant_reference(&cm->gref_head); 1075 1076 /* 1077 * GNTTAB_LIST_END == 0xffffffff, but it is private 1078 * to gnttab.c. 1079 */ 1080 KASSERT(ref != ~0, ("grant_reference failed")); 1081 1082 gnttab_grant_foreign_access_ref( 1083 ref, 1084 xenbus_get_otherend_id(sc->xb_dev), 1085 buffer_ma >> PAGE_SHIFT, 1086 ring_req->operation == BLKIF_OP_WRITE); 1087 1088 *sg_ref = ref; 1089 *sg = (struct blkif_request_segment) { 1090 .gref = ref, 1091 .first_sect = fsect, 1092 .last_sect = lsect }; 1093 sg++; 1094 sg_ref++; 1095 segs++; 1096 nsegs--; 1097 } 1098 block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK); 1099 if (block_segs == 0) 1100 break; 1101 1102 sg = BLKRING_GET_SG_REQUEST(&sc->ring, sc->ring.req_prod_pvt); 1103 sc->ring.req_prod_pvt++; 1104 last_block_sg = sg + block_segs; 1105 } 1106 1107 if (cm->operation == BLKIF_OP_READ) 1108 op = BUS_DMASYNC_PREREAD; 1109 else if (cm->operation == BLKIF_OP_WRITE) 1110 op = BUS_DMASYNC_PREWRITE; 1111 else 1112 op = 0; 1113 bus_dmamap_sync(sc->xb_io_dmat, cm->map, op); 1114 1115 gnttab_free_grant_references(cm->gref_head); 1116 1117 xb_enqueue_busy(cm); 1118 1119 /* 1120 * This flag means that we're probably executing in the busdma swi 1121 * instead of in the startio context, so an explicit flush is needed. 1122 */ 1123 if (cm->cm_flags & XB_CMD_FROZEN) 1124 flush_requests(sc); 1125 1126 //printf("%s: Done\n", __func__); 1127 return; 1128 } 1129 1130 /* 1131 * Dequeue buffers and place them in the shared communication ring. 1132 * Return when no more requests can be accepted or all buffers have 1133 * been queued. 1134 * 1135 * Signal XEN once the ring has been filled out. 1136 */ 1137 static void 1138 xb_startio(struct xb_softc *sc) 1139 { 1140 struct xb_command *cm; 1141 int error, queued = 0; 1142 1143 mtx_assert(&sc->xb_io_lock, MA_OWNED); 1144 1145 while (RING_FREE_REQUESTS(&sc->ring) >= sc->max_request_blocks) { 1146 if (sc->xb_flags & XB_FROZEN) 1147 break; 1148 1149 cm = xb_dequeue_ready(sc); 1150 1151 if (cm == NULL) 1152 cm = xb_bio_command(sc); 1153 1154 if (cm == NULL) 1155 break; 1156 1157 if ((error = blkif_queue_request(sc, cm)) != 0) { 1158 printf("blkif_queue_request returned %d\n", error); 1159 break; 1160 } 1161 queued++; 1162 } 1163 1164 if (queued != 0) 1165 flush_requests(sc); 1166 } 1167 1168 static void 1169 blkif_int(void *xsc) 1170 { 1171 struct xb_softc *sc = xsc; 1172 struct xb_command *cm; 1173 blkif_response_t *bret; 1174 RING_IDX i, rp; 1175 int op; 1176 1177 mtx_lock(&sc->xb_io_lock); 1178 1179 if (unlikely(sc->connected != BLKIF_STATE_CONNECTED)) { 1180 mtx_unlock(&sc->xb_io_lock); 1181 return; 1182 } 1183 1184 again: 1185 rp = sc->ring.sring->rsp_prod; 1186 rmb(); /* Ensure we see queued responses up to 'rp'. */ 1187 1188 for (i = sc->ring.rsp_cons; i != rp;) { 1189 bret = RING_GET_RESPONSE(&sc->ring, i); 1190 cm = &sc->shadow[bret->id]; 1191 1192 xb_remove_busy(cm); 1193 i += blkif_completion(cm); 1194 1195 if (cm->operation == BLKIF_OP_READ) 1196 op = BUS_DMASYNC_POSTREAD; 1197 else if (cm->operation == BLKIF_OP_WRITE) 1198 op = BUS_DMASYNC_POSTWRITE; 1199 else 1200 op = 0; 1201 bus_dmamap_sync(sc->xb_io_dmat, cm->map, op); 1202 bus_dmamap_unload(sc->xb_io_dmat, cm->map); 1203 1204 /* 1205 * If commands are completing then resources are probably 1206 * being freed as well. It's a cheap assumption even when 1207 * wrong. 1208 */ 1209 sc->xb_flags &= ~XB_FROZEN; 1210 1211 /* 1212 * Directly call the i/o complete routine to save an 1213 * an indirection in the common case. 1214 */ 1215 cm->status = bret->status; 1216 if (cm->bp) 1217 xb_bio_complete(sc, cm); 1218 else if (cm->cm_complete) 1219 (cm->cm_complete)(cm); 1220 else 1221 xb_free_command(cm); 1222 } 1223 1224 sc->ring.rsp_cons = i; 1225 1226 if (i != sc->ring.req_prod_pvt) { 1227 int more_to_do; 1228 RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, more_to_do); 1229 if (more_to_do) 1230 goto again; 1231 } else { 1232 sc->ring.sring->rsp_event = i + 1; 1233 } 1234 1235 xb_startio(sc); 1236 1237 mtx_unlock(&sc->xb_io_lock); 1238 } 1239 1240 static void 1241 blkif_free(struct xb_softc *sc, int suspend) 1242 { 1243 uint8_t *sring_page_ptr; 1244 int i; 1245 1246 /* Prevent new requests being issued until we fix things up. */ 1247 mtx_lock(&sc->xb_io_lock); 1248 sc->connected = suspend ? 1249 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 1250 mtx_unlock(&sc->xb_io_lock); 1251 1252 /* Free resources associated with old device channel. */ 1253 if (sc->ring.sring != NULL) { 1254 sring_page_ptr = (uint8_t *)sc->ring.sring; 1255 for (i = 0; i < sc->ring_pages; i++) { 1256 if (sc->ring_ref[i] != GRANT_INVALID_REF) { 1257 gnttab_end_foreign_access_ref(sc->ring_ref[i]); 1258 sc->ring_ref[i] = GRANT_INVALID_REF; 1259 } 1260 sring_page_ptr += PAGE_SIZE; 1261 } 1262 free(sc->ring.sring, M_XENBLOCKFRONT); 1263 sc->ring.sring = NULL; 1264 } 1265 1266 if (sc->shadow) { 1267 1268 for (i = 0; i < sc->max_requests; i++) { 1269 struct xb_command *cm; 1270 1271 cm = &sc->shadow[i]; 1272 if (cm->sg_refs != NULL) { 1273 free(cm->sg_refs, M_XENBLOCKFRONT); 1274 cm->sg_refs = NULL; 1275 } 1276 1277 bus_dmamap_destroy(sc->xb_io_dmat, cm->map); 1278 } 1279 free(sc->shadow, M_XENBLOCKFRONT); 1280 sc->shadow = NULL; 1281 } 1282 1283 if (sc->irq) { 1284 unbind_from_irqhandler(sc->irq); 1285 sc->irq = 0; 1286 } 1287 } 1288 1289 static int 1290 blkif_completion(struct xb_command *s) 1291 { 1292 //printf("%s: Req %p(%d)\n", __func__, s, s->nseg); 1293 gnttab_end_foreign_access_references(s->nseg, s->sg_refs); 1294 return (BLKIF_SEGS_TO_BLOCKS(s->nseg)); 1295 } 1296 1297 #if 0 1298 static void 1299 blkif_recover(struct xb_softc *sc) 1300 { 1301 /* 1302 * XXX The whole concept of not quiescing and completing all i/o 1303 * during suspend, and then hoping to recover and replay the 1304 * resulting abandoned I/O during resume, is laughable. At best, 1305 * it invalidates the i/o ordering rules required by just about 1306 * every filesystem, and at worst it'll corrupt data. The code 1307 * has been removed until further notice. 1308 */ 1309 } 1310 #endif 1311 1312 /* ** Driver registration ** */ 1313 static device_method_t blkfront_methods[] = { 1314 /* Device interface */ 1315 DEVMETHOD(device_probe, blkfront_probe), 1316 DEVMETHOD(device_attach, blkfront_attach), 1317 DEVMETHOD(device_detach, blkfront_detach), 1318 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1319 DEVMETHOD(device_suspend, blkfront_suspend), 1320 DEVMETHOD(device_resume, blkfront_resume), 1321 1322 /* Xenbus interface */ 1323 DEVMETHOD(xenbus_otherend_changed, blkfront_backend_changed), 1324 1325 { 0, 0 } 1326 }; 1327 1328 static driver_t blkfront_driver = { 1329 "xbd", 1330 blkfront_methods, 1331 sizeof(struct xb_softc), 1332 }; 1333 devclass_t blkfront_devclass; 1334 1335 DRIVER_MODULE(xbd, xenbusb_front, blkfront_driver, blkfront_devclass, 0, 0); 1336