1 /* 2 * XenBSD block device driver 3 * 4 * Copyright (c) 2009 Scott Long, Yahoo! 5 * Copyright (c) 2009 Frank Suchomel, Citrix 6 * Copyright (c) 2009 Doug F. Rabson, Citrix 7 * Copyright (c) 2005 Kip Macy 8 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 9 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 10 * 11 * 12 * Permission is hereby granted, free of charge, to any person obtaining a copy 13 * of this software and associated documentation files (the "Software"), to 14 * deal in the Software without restriction, including without limitation the 15 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 16 * sell copies of the Software, and to permit persons to whom the Software is 17 * furnished to do so, subject to the following conditions: 18 * 19 * The above copyright notice and this permission notice shall be included in 20 * all copies or substantial portions of the Software. 21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 27 * DEALINGS IN THE SOFTWARE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/kernel.h> 37 #include <vm/vm.h> 38 #include <vm/pmap.h> 39 40 #include <sys/bio.h> 41 #include <sys/bus.h> 42 #include <sys/conf.h> 43 #include <sys/module.h> 44 45 #include <machine/bus.h> 46 #include <sys/rman.h> 47 #include <machine/resource.h> 48 #include <machine/intr_machdep.h> 49 #include <machine/vmparam.h> 50 #include <sys/bus_dma.h> 51 52 #include <machine/_inttypes.h> 53 #include <machine/xen/xen-os.h> 54 #include <machine/xen/xenvar.h> 55 #include <machine/xen/xenfunc.h> 56 57 #include <xen/hypervisor.h> 58 #include <xen/xen_intr.h> 59 #include <xen/evtchn.h> 60 #include <xen/gnttab.h> 61 #include <xen/interface/grant_table.h> 62 #include <xen/interface/io/protocols.h> 63 #include <xen/xenbus/xenbusvar.h> 64 65 #include <geom/geom_disk.h> 66 67 #include <dev/xen/blkfront/block.h> 68 69 #include "xenbus_if.h" 70 71 /* prototypes */ 72 static void xb_free_command(struct xb_command *cm); 73 static void xb_startio(struct xb_softc *sc); 74 static void blkfront_connect(struct xb_softc *); 75 static void blkfront_closing(device_t); 76 static int blkfront_detach(device_t); 77 static int setup_blkring(struct xb_softc *); 78 static void blkif_int(void *); 79 static void blkfront_initialize(struct xb_softc *); 80 static int blkif_completion(struct xb_command *); 81 static void blkif_free(struct xb_softc *); 82 static void blkif_queue_cb(void *, bus_dma_segment_t *, int, int); 83 84 static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data"); 85 86 #define GRANT_INVALID_REF 0 87 88 /* Control whether runtime update of vbds is enabled. */ 89 #define ENABLE_VBD_UPDATE 0 90 91 #if ENABLE_VBD_UPDATE 92 static void vbd_update(void); 93 #endif 94 95 #define BLKIF_STATE_DISCONNECTED 0 96 #define BLKIF_STATE_CONNECTED 1 97 #define BLKIF_STATE_SUSPENDED 2 98 99 #ifdef notyet 100 static char *blkif_state_name[] = { 101 [BLKIF_STATE_DISCONNECTED] = "disconnected", 102 [BLKIF_STATE_CONNECTED] = "connected", 103 [BLKIF_STATE_SUSPENDED] = "closed", 104 }; 105 106 static char * blkif_status_name[] = { 107 [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", 108 [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", 109 [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", 110 [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", 111 }; 112 #endif 113 114 #if 0 115 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args) 116 #else 117 #define DPRINTK(fmt, args...) 118 #endif 119 120 static int blkif_open(struct disk *dp); 121 static int blkif_close(struct disk *dp); 122 static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td); 123 static int blkif_queue_request(struct xb_softc *sc, struct xb_command *cm); 124 static void xb_strategy(struct bio *bp); 125 126 // In order to quiesce the device during kernel dumps, outstanding requests to 127 // DOM0 for disk reads/writes need to be accounted for. 128 static int xb_dump(void *, void *, vm_offset_t, off_t, size_t); 129 130 /* XXX move to xb_vbd.c when VBD update support is added */ 131 #define MAX_VBDS 64 132 133 #define XBD_SECTOR_SIZE 512 /* XXX: assume for now */ 134 #define XBD_SECTOR_SHFT 9 135 136 /* 137 * Translate Linux major/minor to an appropriate name and unit 138 * number. For HVM guests, this allows us to use the same drive names 139 * with blkfront as the emulated drives, easing transition slightly. 140 */ 141 static void 142 blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name) 143 { 144 static struct vdev_info { 145 int major; 146 int shift; 147 int base; 148 const char *name; 149 } info[] = { 150 {3, 6, 0, "ad"}, /* ide0 */ 151 {22, 6, 2, "ad"}, /* ide1 */ 152 {33, 6, 4, "ad"}, /* ide2 */ 153 {34, 6, 6, "ad"}, /* ide3 */ 154 {56, 6, 8, "ad"}, /* ide4 */ 155 {57, 6, 10, "ad"}, /* ide5 */ 156 {88, 6, 12, "ad"}, /* ide6 */ 157 {89, 6, 14, "ad"}, /* ide7 */ 158 {90, 6, 16, "ad"}, /* ide8 */ 159 {91, 6, 18, "ad"}, /* ide9 */ 160 161 {8, 4, 0, "da"}, /* scsi disk0 */ 162 {65, 4, 16, "da"}, /* scsi disk1 */ 163 {66, 4, 32, "da"}, /* scsi disk2 */ 164 {67, 4, 48, "da"}, /* scsi disk3 */ 165 {68, 4, 64, "da"}, /* scsi disk4 */ 166 {69, 4, 80, "da"}, /* scsi disk5 */ 167 {70, 4, 96, "da"}, /* scsi disk6 */ 168 {71, 4, 112, "da"}, /* scsi disk7 */ 169 {128, 4, 128, "da"}, /* scsi disk8 */ 170 {129, 4, 144, "da"}, /* scsi disk9 */ 171 {130, 4, 160, "da"}, /* scsi disk10 */ 172 {131, 4, 176, "da"}, /* scsi disk11 */ 173 {132, 4, 192, "da"}, /* scsi disk12 */ 174 {133, 4, 208, "da"}, /* scsi disk13 */ 175 {134, 4, 224, "da"}, /* scsi disk14 */ 176 {135, 4, 240, "da"}, /* scsi disk15 */ 177 178 {202, 4, 0, "xbd"}, /* xbd */ 179 180 {0, 0, 0, NULL}, 181 }; 182 int major = vdevice >> 8; 183 int minor = vdevice & 0xff; 184 int i; 185 186 if (vdevice & (1 << 28)) { 187 *unit = (vdevice & ((1 << 28) - 1)) >> 8; 188 *name = "xbd"; 189 } 190 191 for (i = 0; info[i].major; i++) { 192 if (info[i].major == major) { 193 *unit = info[i].base + (minor >> info[i].shift); 194 *name = info[i].name; 195 return; 196 } 197 } 198 199 *unit = minor >> 4; 200 *name = "xbd"; 201 } 202 203 int 204 xlvbd_add(struct xb_softc *sc, blkif_sector_t sectors, 205 int vdevice, uint16_t vdisk_info, unsigned long sector_size) 206 { 207 int unit, error = 0; 208 const char *name; 209 210 blkfront_vdevice_to_unit(vdevice, &unit, &name); 211 212 sc->xb_unit = unit; 213 214 if (strcmp(name, "xbd")) 215 device_printf(sc->xb_dev, "attaching as %s%d\n", name, unit); 216 217 sc->xb_disk = disk_alloc(); 218 sc->xb_disk->d_unit = sc->xb_unit; 219 sc->xb_disk->d_open = blkif_open; 220 sc->xb_disk->d_close = blkif_close; 221 sc->xb_disk->d_ioctl = blkif_ioctl; 222 sc->xb_disk->d_strategy = xb_strategy; 223 sc->xb_disk->d_dump = xb_dump; 224 sc->xb_disk->d_name = name; 225 sc->xb_disk->d_drv1 = sc; 226 sc->xb_disk->d_sectorsize = sector_size; 227 228 sc->xb_disk->d_mediasize = sectors * sector_size; 229 sc->xb_disk->d_maxsize = sc->max_request_size; 230 sc->xb_disk->d_flags = 0; 231 disk_create(sc->xb_disk, DISK_VERSION_00); 232 233 return error; 234 } 235 236 /************************ end VBD support *****************/ 237 238 /* 239 * Read/write routine for a buffer. Finds the proper unit, place it on 240 * the sortq and kick the controller. 241 */ 242 static void 243 xb_strategy(struct bio *bp) 244 { 245 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 246 247 /* bogus disk? */ 248 if (sc == NULL) { 249 bp->bio_error = EINVAL; 250 bp->bio_flags |= BIO_ERROR; 251 bp->bio_resid = bp->bio_bcount; 252 biodone(bp); 253 return; 254 } 255 256 /* 257 * Place it in the queue of disk activities for this disk 258 */ 259 mtx_lock(&sc->xb_io_lock); 260 261 xb_enqueue_bio(sc, bp); 262 xb_startio(sc); 263 264 mtx_unlock(&sc->xb_io_lock); 265 return; 266 } 267 268 static void 269 xb_bio_complete(struct xb_softc *sc, struct xb_command *cm) 270 { 271 struct bio *bp; 272 273 bp = cm->bp; 274 275 if ( unlikely(cm->status != BLKIF_RSP_OKAY) ) { 276 disk_err(bp, "disk error" , -1, 0); 277 printf(" status: %x\n", cm->status); 278 bp->bio_flags |= BIO_ERROR; 279 } 280 281 if (bp->bio_flags & BIO_ERROR) 282 bp->bio_error = EIO; 283 else 284 bp->bio_resid = 0; 285 286 xb_free_command(cm); 287 biodone(bp); 288 } 289 290 // Quiesce the disk writes for a dump file before allowing the next buffer. 291 static void 292 xb_quiesce(struct xb_softc *sc) 293 { 294 int mtd; 295 296 // While there are outstanding requests 297 while (!TAILQ_EMPTY(&sc->cm_busy)) { 298 RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, mtd); 299 if (mtd) { 300 /* Recieved request completions, update queue. */ 301 blkif_int(sc); 302 } 303 if (!TAILQ_EMPTY(&sc->cm_busy)) { 304 /* 305 * Still pending requests, wait for the disk i/o 306 * to complete. 307 */ 308 HYPERVISOR_yield(); 309 } 310 } 311 } 312 313 /* Kernel dump function for a paravirtualized disk device */ 314 static void 315 xb_dump_complete(struct xb_command *cm) 316 { 317 318 xb_enqueue_complete(cm); 319 } 320 321 static int 322 xb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, 323 size_t length) 324 { 325 struct disk *dp = arg; 326 struct xb_softc *sc = (struct xb_softc *) dp->d_drv1; 327 struct xb_command *cm; 328 size_t chunk; 329 int sbp; 330 int rc = 0; 331 332 if (length <= 0) 333 return (rc); 334 335 xb_quiesce(sc); /* All quiet on the western front. */ 336 337 /* 338 * If this lock is held, then this module is failing, and a 339 * successful kernel dump is highly unlikely anyway. 340 */ 341 mtx_lock(&sc->xb_io_lock); 342 343 /* Split the 64KB block as needed */ 344 for (sbp=0; length > 0; sbp++) { 345 cm = xb_dequeue_free(sc); 346 if (cm == NULL) { 347 mtx_unlock(&sc->xb_io_lock); 348 device_printf(sc->xb_dev, "dump: no more commands?\n"); 349 return (EBUSY); 350 } 351 352 if (gnttab_alloc_grant_references(sc->max_request_segments, 353 &cm->gref_head) != 0) { 354 xb_free_command(cm); 355 mtx_unlock(&sc->xb_io_lock); 356 device_printf(sc->xb_dev, "no more grant allocs?\n"); 357 return (EBUSY); 358 } 359 360 chunk = length > sc->max_request_size 361 ? sc->max_request_size : length; 362 cm->data = virtual; 363 cm->datalen = chunk; 364 cm->operation = BLKIF_OP_WRITE; 365 cm->sector_number = offset / dp->d_sectorsize; 366 cm->cm_complete = xb_dump_complete; 367 368 xb_enqueue_ready(cm); 369 370 length -= chunk; 371 offset += chunk; 372 virtual = (char *) virtual + chunk; 373 } 374 375 /* Tell DOM0 to do the I/O */ 376 xb_startio(sc); 377 mtx_unlock(&sc->xb_io_lock); 378 379 /* Poll for the completion. */ 380 xb_quiesce(sc); /* All quite on the eastern front */ 381 382 /* If there were any errors, bail out... */ 383 while ((cm = xb_dequeue_complete(sc)) != NULL) { 384 if (cm->status != BLKIF_RSP_OKAY) { 385 device_printf(sc->xb_dev, 386 "Dump I/O failed at sector %jd\n", 387 cm->sector_number); 388 rc = EIO; 389 } 390 xb_free_command(cm); 391 } 392 393 return (rc); 394 } 395 396 397 static int 398 blkfront_probe(device_t dev) 399 { 400 401 if (!strcmp(xenbus_get_type(dev), "vbd")) { 402 device_set_desc(dev, "Virtual Block Device"); 403 device_quiet(dev); 404 return (0); 405 } 406 407 return (ENXIO); 408 } 409 410 /* 411 * Setup supplies the backend dir, virtual device. We place an event 412 * channel and shared frame entries. We watch backend to wait if it's 413 * ok. 414 */ 415 static int 416 blkfront_attach(device_t dev) 417 { 418 struct xb_softc *sc; 419 const char *name; 420 int error; 421 int vdevice; 422 int i; 423 int unit; 424 425 /* FIXME: Use dynamic device id if this is not set. */ 426 error = xs_scanf(XST_NIL, xenbus_get_node(dev), 427 "virtual-device", NULL, "%i", &vdevice); 428 if (error) { 429 xenbus_dev_fatal(dev, error, "reading virtual-device"); 430 device_printf(dev, "Couldn't determine virtual device.\n"); 431 return (error); 432 } 433 434 blkfront_vdevice_to_unit(vdevice, &unit, &name); 435 if (!strcmp(name, "xbd")) 436 device_set_unit(dev, unit); 437 438 sc = device_get_softc(dev); 439 mtx_init(&sc->xb_io_lock, "blkfront i/o lock", NULL, MTX_DEF); 440 xb_initq_free(sc); 441 xb_initq_busy(sc); 442 xb_initq_ready(sc); 443 xb_initq_complete(sc); 444 xb_initq_bio(sc); 445 for (i = 0; i < XBF_MAX_RING_PAGES; i++) 446 sc->ring_ref[i] = GRANT_INVALID_REF; 447 448 sc->xb_dev = dev; 449 sc->vdevice = vdevice; 450 sc->connected = BLKIF_STATE_DISCONNECTED; 451 452 /* Wait for backend device to publish its protocol capabilities. */ 453 xenbus_set_state(dev, XenbusStateInitialising); 454 455 return (0); 456 } 457 458 static int 459 blkfront_suspend(device_t dev) 460 { 461 struct xb_softc *sc = device_get_softc(dev); 462 int retval; 463 int saved_state; 464 465 /* Prevent new requests being issued until we fix things up. */ 466 mtx_lock(&sc->xb_io_lock); 467 saved_state = sc->connected; 468 sc->connected = BLKIF_STATE_SUSPENDED; 469 470 /* Wait for outstanding I/O to drain. */ 471 retval = 0; 472 while (TAILQ_EMPTY(&sc->cm_busy) == 0) { 473 if (msleep(&sc->cm_busy, &sc->xb_io_lock, 474 PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) { 475 retval = EBUSY; 476 break; 477 } 478 } 479 mtx_unlock(&sc->xb_io_lock); 480 481 if (retval != 0) 482 sc->connected = saved_state; 483 484 return (retval); 485 } 486 487 static int 488 blkfront_resume(device_t dev) 489 { 490 struct xb_softc *sc = device_get_softc(dev); 491 492 DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev)); 493 494 blkif_free(sc); 495 blkfront_initialize(sc); 496 return (0); 497 } 498 499 static void 500 blkfront_initialize(struct xb_softc *sc) 501 { 502 const char *otherend_path; 503 const char *node_path; 504 int error; 505 int i; 506 507 if (xenbus_get_state(sc->xb_dev) != XenbusStateInitialising) { 508 /* Initialization has already been performed. */ 509 return; 510 } 511 512 /* 513 * Protocol defaults valid even if negotiation for a 514 * setting fails. 515 */ 516 sc->ring_pages = 1; 517 sc->max_requests = BLKIF_MAX_RING_REQUESTS(PAGE_SIZE); 518 sc->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK; 519 sc->max_request_size = (sc->max_request_segments - 1) * PAGE_SIZE; 520 sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments); 521 522 /* 523 * Protocol negotiation. 524 * 525 * \note xs_gather() returns on the first encountered error, so 526 * we must use independant calls in order to guarantee 527 * we don't miss information in a sparsly populated back-end 528 * tree. 529 */ 530 otherend_path = xenbus_get_otherend_path(sc->xb_dev); 531 node_path = xenbus_get_node(sc->xb_dev); 532 (void)xs_scanf(XST_NIL, otherend_path, 533 "max-ring-pages", NULL, "%" PRIu32, 534 &sc->ring_pages); 535 536 (void)xs_scanf(XST_NIL, otherend_path, 537 "max-requests", NULL, "%" PRIu32, 538 &sc->max_requests); 539 540 (void)xs_scanf(XST_NIL, otherend_path, 541 "max-request-segments", NULL, "%" PRIu32, 542 &sc->max_request_segments); 543 544 (void)xs_scanf(XST_NIL, otherend_path, 545 "max-request-size", NULL, "%" PRIu32, 546 &sc->max_request_size); 547 548 if (sc->ring_pages > XBF_MAX_RING_PAGES) { 549 device_printf(sc->xb_dev, "Back-end specified ring-pages of " 550 "%u limited to front-end limit of %zu.\n", 551 sc->ring_pages, XBF_MAX_RING_PAGES); 552 sc->ring_pages = XBF_MAX_RING_PAGES; 553 } 554 555 if (sc->max_requests > XBF_MAX_REQUESTS) { 556 device_printf(sc->xb_dev, "Back-end specified max_requests of " 557 "%u limited to front-end limit of %u.\n", 558 sc->max_requests, XBF_MAX_REQUESTS); 559 sc->max_requests = XBF_MAX_REQUESTS; 560 } 561 562 if (sc->max_request_segments > XBF_MAX_SEGMENTS_PER_REQUEST) { 563 device_printf(sc->xb_dev, "Back-end specificed " 564 "max_requests_segments of %u limited to " 565 "front-end limit of %u.\n", 566 sc->max_request_segments, 567 XBF_MAX_SEGMENTS_PER_REQUEST); 568 sc->max_request_segments = XBF_MAX_SEGMENTS_PER_REQUEST; 569 } 570 571 if (sc->max_request_size > XBF_MAX_REQUEST_SIZE) { 572 device_printf(sc->xb_dev, "Back-end specificed " 573 "max_request_size of %u limited to front-end " 574 "limit of %u.\n", sc->max_request_size, 575 XBF_MAX_REQUEST_SIZE); 576 sc->max_request_size = XBF_MAX_REQUEST_SIZE; 577 } 578 sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments); 579 580 /* Allocate datastructures based on negotiated values. */ 581 error = bus_dma_tag_create(NULL, /* parent */ 582 512, PAGE_SIZE, /* algnmnt, boundary */ 583 BUS_SPACE_MAXADDR, /* lowaddr */ 584 BUS_SPACE_MAXADDR, /* highaddr */ 585 NULL, NULL, /* filter, filterarg */ 586 sc->max_request_size, 587 sc->max_request_segments, 588 PAGE_SIZE, /* maxsegsize */ 589 BUS_DMA_ALLOCNOW, /* flags */ 590 busdma_lock_mutex, /* lockfunc */ 591 &sc->xb_io_lock, /* lockarg */ 592 &sc->xb_io_dmat); 593 if (error != 0) { 594 xenbus_dev_fatal(sc->xb_dev, error, 595 "Cannot allocate parent DMA tag\n"); 596 return; 597 } 598 599 /* Per-transaction data allocation. */ 600 sc->shadow = malloc(sizeof(*sc->shadow) * sc->max_requests, 601 M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); 602 if (sc->shadow == NULL) { 603 bus_dma_tag_destroy(sc->xb_io_dmat); 604 xenbus_dev_fatal(sc->xb_dev, error, 605 "Cannot allocate request structures\n"); 606 return; 607 } 608 609 for (i = 0; i < sc->max_requests; i++) { 610 struct xb_command *cm; 611 612 cm = &sc->shadow[i]; 613 cm->sg_refs = malloc(sizeof(grant_ref_t) 614 * sc->max_request_segments, 615 M_XENBLOCKFRONT, M_NOWAIT); 616 if (cm->sg_refs == NULL) 617 break; 618 cm->id = i; 619 cm->cm_sc = sc; 620 if (bus_dmamap_create(sc->xb_io_dmat, 0, &cm->map) != 0) 621 break; 622 xb_free_command(cm); 623 } 624 625 if (setup_blkring(sc) != 0) 626 return; 627 628 error = xs_printf(XST_NIL, node_path, 629 "ring-pages","%u", sc->ring_pages); 630 if (error) { 631 xenbus_dev_fatal(sc->xb_dev, error, 632 "writing %s/ring-pages", 633 node_path); 634 return; 635 } 636 637 error = xs_printf(XST_NIL, node_path, 638 "max-requests","%u", sc->max_requests); 639 if (error) { 640 xenbus_dev_fatal(sc->xb_dev, error, 641 "writing %s/max-requests", 642 node_path); 643 return; 644 } 645 646 error = xs_printf(XST_NIL, node_path, 647 "max-request-segments","%u", sc->max_request_segments); 648 if (error) { 649 xenbus_dev_fatal(sc->xb_dev, error, 650 "writing %s/max-request-segments", 651 node_path); 652 return; 653 } 654 655 error = xs_printf(XST_NIL, node_path, 656 "max-request-size","%u", sc->max_request_size); 657 if (error) { 658 xenbus_dev_fatal(sc->xb_dev, error, 659 "writing %s/max-request-size", 660 node_path); 661 return; 662 } 663 664 error = xs_printf(XST_NIL, node_path, "event-channel", 665 "%u", irq_to_evtchn_port(sc->irq)); 666 if (error) { 667 xenbus_dev_fatal(sc->xb_dev, error, 668 "writing %s/event-channel", 669 node_path); 670 return; 671 } 672 673 error = xs_printf(XST_NIL, node_path, 674 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); 675 if (error) { 676 xenbus_dev_fatal(sc->xb_dev, error, 677 "writing %s/protocol", 678 node_path); 679 return; 680 } 681 682 xenbus_set_state(sc->xb_dev, XenbusStateInitialised); 683 } 684 685 static int 686 setup_blkring(struct xb_softc *sc) 687 { 688 blkif_sring_t *sring; 689 uintptr_t sring_page_addr; 690 int error; 691 int i; 692 693 sring = malloc(sc->ring_pages * PAGE_SIZE, M_XENBLOCKFRONT, 694 M_NOWAIT|M_ZERO); 695 if (sring == NULL) { 696 xenbus_dev_fatal(sc->xb_dev, ENOMEM, "allocating shared ring"); 697 return (ENOMEM); 698 } 699 SHARED_RING_INIT(sring); 700 FRONT_RING_INIT(&sc->ring, sring, sc->ring_pages * PAGE_SIZE); 701 702 for (i = 0, sring_page_addr = (uintptr_t)sring; 703 i < sc->ring_pages; 704 i++, sring_page_addr += PAGE_SIZE) { 705 706 error = xenbus_grant_ring(sc->xb_dev, 707 (vtomach(sring_page_addr) >> PAGE_SHIFT), &sc->ring_ref[i]); 708 if (error) { 709 xenbus_dev_fatal(sc->xb_dev, error, 710 "granting ring_ref(%d)", i); 711 return (error); 712 } 713 } 714 error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev), 715 "ring-ref","%u", sc->ring_ref[0]); 716 if (error) { 717 xenbus_dev_fatal(sc->xb_dev, error, "writing %s/ring-ref", 718 xenbus_get_node(sc->xb_dev)); 719 return (error); 720 } 721 for (i = 1; i < sc->ring_pages; i++) { 722 char ring_ref_name[]= "ring_refXX"; 723 724 snprintf(ring_ref_name, sizeof(ring_ref_name), "ring-ref%u", i); 725 error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev), 726 ring_ref_name, "%u", sc->ring_ref[i]); 727 if (error) { 728 xenbus_dev_fatal(sc->xb_dev, error, "writing %s/%s", 729 xenbus_get_node(sc->xb_dev), 730 ring_ref_name); 731 return (error); 732 } 733 } 734 735 error = bind_listening_port_to_irqhandler( 736 xenbus_get_otherend_id(sc->xb_dev), 737 "xbd", (driver_intr_t *)blkif_int, sc, 738 INTR_TYPE_BIO | INTR_MPSAFE, &sc->irq); 739 if (error) { 740 xenbus_dev_fatal(sc->xb_dev, error, 741 "bind_evtchn_to_irqhandler failed"); 742 return (error); 743 } 744 745 return (0); 746 } 747 748 /** 749 * Callback received when the backend's state changes. 750 */ 751 static void 752 blkfront_backend_changed(device_t dev, XenbusState backend_state) 753 { 754 struct xb_softc *sc = device_get_softc(dev); 755 756 DPRINTK("backend_state=%d\n", backend_state); 757 758 switch (backend_state) { 759 case XenbusStateUnknown: 760 case XenbusStateInitialising: 761 case XenbusStateReconfigured: 762 case XenbusStateReconfiguring: 763 case XenbusStateClosed: 764 break; 765 766 case XenbusStateInitWait: 767 case XenbusStateInitialised: 768 blkfront_initialize(sc); 769 break; 770 771 case XenbusStateConnected: 772 blkfront_initialize(sc); 773 blkfront_connect(sc); 774 break; 775 776 case XenbusStateClosing: 777 if (sc->users > 0) 778 xenbus_dev_error(dev, -EBUSY, 779 "Device in use; refusing to close"); 780 else 781 blkfront_closing(dev); 782 break; 783 } 784 } 785 786 /* 787 ** Invoked when the backend is finally 'ready' (and has published 788 ** the details about the physical device - #sectors, size, etc). 789 */ 790 static void 791 blkfront_connect(struct xb_softc *sc) 792 { 793 device_t dev = sc->xb_dev; 794 unsigned long sectors, sector_size; 795 unsigned int binfo; 796 int err, feature_barrier; 797 798 if( (sc->connected == BLKIF_STATE_CONNECTED) || 799 (sc->connected == BLKIF_STATE_SUSPENDED) ) 800 return; 801 802 DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); 803 804 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), 805 "sectors", "%lu", §ors, 806 "info", "%u", &binfo, 807 "sector-size", "%lu", §or_size, 808 NULL); 809 if (err) { 810 xenbus_dev_fatal(dev, err, 811 "reading backend fields at %s", 812 xenbus_get_otherend_path(dev)); 813 return; 814 } 815 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), 816 "feature-barrier", "%lu", &feature_barrier, 817 NULL); 818 if (!err || feature_barrier) 819 sc->xb_flags |= XB_BARRIER; 820 821 if (sc->xb_disk == NULL) { 822 device_printf(dev, "%juMB <%s> at %s", 823 (uintmax_t) sectors / (1048576 / sector_size), 824 device_get_desc(dev), 825 xenbus_get_node(dev)); 826 bus_print_child_footer(device_get_parent(dev), dev); 827 828 xlvbd_add(sc, sectors, sc->vdevice, binfo, sector_size); 829 } 830 831 (void)xenbus_set_state(dev, XenbusStateConnected); 832 833 /* Kick pending requests. */ 834 mtx_lock(&sc->xb_io_lock); 835 sc->connected = BLKIF_STATE_CONNECTED; 836 xb_startio(sc); 837 sc->xb_flags |= XB_READY; 838 mtx_unlock(&sc->xb_io_lock); 839 } 840 841 /** 842 * Handle the change of state of the backend to Closing. We must delete our 843 * device-layer structures now, to ensure that writes are flushed through to 844 * the backend. Once this is done, we can switch to Closed in 845 * acknowledgement. 846 */ 847 static void 848 blkfront_closing(device_t dev) 849 { 850 struct xb_softc *sc = device_get_softc(dev); 851 852 xenbus_set_state(dev, XenbusStateClosing); 853 854 DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev)); 855 856 if (sc->xb_disk != NULL) { 857 disk_destroy(sc->xb_disk); 858 sc->xb_disk = NULL; 859 } 860 861 xenbus_set_state(dev, XenbusStateClosed); 862 } 863 864 865 static int 866 blkfront_detach(device_t dev) 867 { 868 struct xb_softc *sc = device_get_softc(dev); 869 870 DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev)); 871 872 blkif_free(sc); 873 mtx_destroy(&sc->xb_io_lock); 874 875 return 0; 876 } 877 878 879 static inline void 880 flush_requests(struct xb_softc *sc) 881 { 882 int notify; 883 884 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->ring, notify); 885 886 if (notify) 887 notify_remote_via_irq(sc->irq); 888 } 889 890 static void 891 blkif_restart_queue_callback(void *arg) 892 { 893 struct xb_softc *sc = arg; 894 895 mtx_lock(&sc->xb_io_lock); 896 897 xb_startio(sc); 898 899 mtx_unlock(&sc->xb_io_lock); 900 } 901 902 static int 903 blkif_open(struct disk *dp) 904 { 905 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 906 907 if (sc == NULL) { 908 printf("xb%d: not found", sc->xb_unit); 909 return (ENXIO); 910 } 911 912 sc->xb_flags |= XB_OPEN; 913 sc->users++; 914 return (0); 915 } 916 917 static int 918 blkif_close(struct disk *dp) 919 { 920 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 921 922 if (sc == NULL) 923 return (ENXIO); 924 sc->xb_flags &= ~XB_OPEN; 925 if (--(sc->users) == 0) { 926 /* Check whether we have been instructed to close. We will 927 have ignored this request initially, as the device was 928 still mounted. */ 929 device_t dev = sc->xb_dev; 930 XenbusState state = 931 xenbus_read_driver_state(xenbus_get_otherend_path(dev)); 932 933 if (state == XenbusStateClosing) 934 blkfront_closing(dev); 935 } 936 return (0); 937 } 938 939 static int 940 blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) 941 { 942 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 943 944 if (sc == NULL) 945 return (ENXIO); 946 947 return (ENOTTY); 948 } 949 950 static void 951 xb_free_command(struct xb_command *cm) 952 { 953 954 KASSERT((cm->cm_flags & XB_ON_XBQ_MASK) == 0, 955 ("Freeing command that is still on a queue\n")); 956 957 cm->cm_flags = 0; 958 cm->bp = NULL; 959 cm->cm_complete = NULL; 960 xb_enqueue_free(cm); 961 } 962 963 /* 964 * blkif_queue_request 965 * 966 * request block io 967 * 968 * id: for guest use only. 969 * operation: BLKIF_OP_{READ,WRITE,PROBE} 970 * buffer: buffer to read/write into. this should be a 971 * virtual address in the guest os. 972 */ 973 static struct xb_command * 974 xb_bio_command(struct xb_softc *sc) 975 { 976 struct xb_command *cm; 977 struct bio *bp; 978 979 if (unlikely(sc->connected != BLKIF_STATE_CONNECTED)) 980 return (NULL); 981 982 bp = xb_dequeue_bio(sc); 983 if (bp == NULL) 984 return (NULL); 985 986 if ((cm = xb_dequeue_free(sc)) == NULL) { 987 xb_requeue_bio(sc, bp); 988 return (NULL); 989 } 990 991 if (gnttab_alloc_grant_references(sc->max_request_segments, 992 &cm->gref_head) != 0) { 993 gnttab_request_free_callback(&sc->callback, 994 blkif_restart_queue_callback, sc, 995 sc->max_request_segments); 996 xb_requeue_bio(sc, bp); 997 xb_enqueue_free(cm); 998 sc->xb_flags |= XB_FROZEN; 999 return (NULL); 1000 } 1001 1002 cm->bp = bp; 1003 cm->data = bp->bio_data; 1004 cm->datalen = bp->bio_bcount; 1005 cm->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ : 1006 BLKIF_OP_WRITE; 1007 cm->sector_number = (blkif_sector_t)bp->bio_pblkno; 1008 1009 return (cm); 1010 } 1011 1012 static int 1013 blkif_queue_request(struct xb_softc *sc, struct xb_command *cm) 1014 { 1015 int error; 1016 1017 error = bus_dmamap_load(sc->xb_io_dmat, cm->map, cm->data, cm->datalen, 1018 blkif_queue_cb, cm, 0); 1019 if (error == EINPROGRESS) { 1020 printf("EINPROGRESS\n"); 1021 sc->xb_flags |= XB_FROZEN; 1022 cm->cm_flags |= XB_CMD_FROZEN; 1023 return (0); 1024 } 1025 1026 return (error); 1027 } 1028 1029 static void 1030 blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 1031 { 1032 struct xb_softc *sc; 1033 struct xb_command *cm; 1034 blkif_request_t *ring_req; 1035 struct blkif_request_segment *sg; 1036 struct blkif_request_segment *last_block_sg; 1037 grant_ref_t *sg_ref; 1038 vm_paddr_t buffer_ma; 1039 uint64_t fsect, lsect; 1040 int ref; 1041 int op; 1042 int block_segs; 1043 1044 cm = arg; 1045 sc = cm->cm_sc; 1046 1047 //printf("%s: Start\n", __func__); 1048 if (error) { 1049 printf("error %d in blkif_queue_cb\n", error); 1050 cm->bp->bio_error = EIO; 1051 biodone(cm->bp); 1052 xb_free_command(cm); 1053 return; 1054 } 1055 1056 /* Fill out a communications ring structure. */ 1057 ring_req = RING_GET_REQUEST(&sc->ring, sc->ring.req_prod_pvt); 1058 sc->ring.req_prod_pvt++; 1059 ring_req->id = cm->id; 1060 ring_req->operation = cm->operation; 1061 ring_req->sector_number = cm->sector_number; 1062 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk; 1063 ring_req->nr_segments = nsegs; 1064 cm->nseg = nsegs; 1065 1066 block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK); 1067 sg = ring_req->seg; 1068 last_block_sg = sg + block_segs; 1069 sg_ref = cm->sg_refs; 1070 1071 while (1) { 1072 1073 while (sg < last_block_sg) { 1074 buffer_ma = segs->ds_addr; 1075 fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; 1076 lsect = fsect + (segs->ds_len >> XBD_SECTOR_SHFT) - 1; 1077 1078 KASSERT(lsect <= 7, ("XEN disk driver data cannot " 1079 "cross a page boundary")); 1080 1081 /* install a grant reference. */ 1082 ref = gnttab_claim_grant_reference(&cm->gref_head); 1083 1084 /* 1085 * GNTTAB_LIST_END == 0xffffffff, but it is private 1086 * to gnttab.c. 1087 */ 1088 KASSERT(ref != ~0, ("grant_reference failed")); 1089 1090 gnttab_grant_foreign_access_ref( 1091 ref, 1092 xenbus_get_otherend_id(sc->xb_dev), 1093 buffer_ma >> PAGE_SHIFT, 1094 ring_req->operation == BLKIF_OP_WRITE); 1095 1096 *sg_ref = ref; 1097 *sg = (struct blkif_request_segment) { 1098 .gref = ref, 1099 .first_sect = fsect, 1100 .last_sect = lsect }; 1101 sg++; 1102 sg_ref++; 1103 segs++; 1104 nsegs--; 1105 } 1106 block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK); 1107 if (block_segs == 0) 1108 break; 1109 1110 sg = BLKRING_GET_SG_REQUEST(&sc->ring, sc->ring.req_prod_pvt); 1111 sc->ring.req_prod_pvt++; 1112 last_block_sg = sg + block_segs; 1113 } 1114 1115 if (cm->operation == BLKIF_OP_READ) 1116 op = BUS_DMASYNC_PREREAD; 1117 else if (cm->operation == BLKIF_OP_WRITE) 1118 op = BUS_DMASYNC_PREWRITE; 1119 else 1120 op = 0; 1121 bus_dmamap_sync(sc->xb_io_dmat, cm->map, op); 1122 1123 gnttab_free_grant_references(cm->gref_head); 1124 1125 xb_enqueue_busy(cm); 1126 1127 /* 1128 * This flag means that we're probably executing in the busdma swi 1129 * instead of in the startio context, so an explicit flush is needed. 1130 */ 1131 if (cm->cm_flags & XB_CMD_FROZEN) 1132 flush_requests(sc); 1133 1134 //printf("%s: Done\n", __func__); 1135 return; 1136 } 1137 1138 /* 1139 * Dequeue buffers and place them in the shared communication ring. 1140 * Return when no more requests can be accepted or all buffers have 1141 * been queued. 1142 * 1143 * Signal XEN once the ring has been filled out. 1144 */ 1145 static void 1146 xb_startio(struct xb_softc *sc) 1147 { 1148 struct xb_command *cm; 1149 int error, queued = 0; 1150 1151 mtx_assert(&sc->xb_io_lock, MA_OWNED); 1152 1153 if (sc->connected != BLKIF_STATE_CONNECTED) 1154 return; 1155 1156 while (RING_FREE_REQUESTS(&sc->ring) >= sc->max_request_blocks) { 1157 if (sc->xb_flags & XB_FROZEN) 1158 break; 1159 1160 cm = xb_dequeue_ready(sc); 1161 1162 if (cm == NULL) 1163 cm = xb_bio_command(sc); 1164 1165 if (cm == NULL) 1166 break; 1167 1168 if ((error = blkif_queue_request(sc, cm)) != 0) { 1169 printf("blkif_queue_request returned %d\n", error); 1170 break; 1171 } 1172 queued++; 1173 } 1174 1175 if (queued != 0) 1176 flush_requests(sc); 1177 } 1178 1179 static void 1180 blkif_int(void *xsc) 1181 { 1182 struct xb_softc *sc = xsc; 1183 struct xb_command *cm; 1184 blkif_response_t *bret; 1185 RING_IDX i, rp; 1186 int op; 1187 1188 mtx_lock(&sc->xb_io_lock); 1189 1190 if (unlikely(sc->connected == BLKIF_STATE_DISCONNECTED)) { 1191 mtx_unlock(&sc->xb_io_lock); 1192 return; 1193 } 1194 1195 again: 1196 rp = sc->ring.sring->rsp_prod; 1197 rmb(); /* Ensure we see queued responses up to 'rp'. */ 1198 1199 for (i = sc->ring.rsp_cons; i != rp;) { 1200 bret = RING_GET_RESPONSE(&sc->ring, i); 1201 cm = &sc->shadow[bret->id]; 1202 1203 xb_remove_busy(cm); 1204 i += blkif_completion(cm); 1205 1206 if (cm->operation == BLKIF_OP_READ) 1207 op = BUS_DMASYNC_POSTREAD; 1208 else if (cm->operation == BLKIF_OP_WRITE) 1209 op = BUS_DMASYNC_POSTWRITE; 1210 else 1211 op = 0; 1212 bus_dmamap_sync(sc->xb_io_dmat, cm->map, op); 1213 bus_dmamap_unload(sc->xb_io_dmat, cm->map); 1214 1215 /* 1216 * If commands are completing then resources are probably 1217 * being freed as well. It's a cheap assumption even when 1218 * wrong. 1219 */ 1220 sc->xb_flags &= ~XB_FROZEN; 1221 1222 /* 1223 * Directly call the i/o complete routine to save an 1224 * an indirection in the common case. 1225 */ 1226 cm->status = bret->status; 1227 if (cm->bp) 1228 xb_bio_complete(sc, cm); 1229 else if (cm->cm_complete) 1230 (cm->cm_complete)(cm); 1231 else 1232 xb_free_command(cm); 1233 } 1234 1235 sc->ring.rsp_cons = i; 1236 1237 if (i != sc->ring.req_prod_pvt) { 1238 int more_to_do; 1239 RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, more_to_do); 1240 if (more_to_do) 1241 goto again; 1242 } else { 1243 sc->ring.sring->rsp_event = i + 1; 1244 } 1245 1246 xb_startio(sc); 1247 1248 if (unlikely(sc->connected == BLKIF_STATE_SUSPENDED)) 1249 wakeup(&sc->cm_busy); 1250 1251 mtx_unlock(&sc->xb_io_lock); 1252 } 1253 1254 static void 1255 blkif_free(struct xb_softc *sc) 1256 { 1257 uint8_t *sring_page_ptr; 1258 int i; 1259 1260 /* Prevent new requests being issued until we fix things up. */ 1261 mtx_lock(&sc->xb_io_lock); 1262 sc->connected = BLKIF_STATE_DISCONNECTED; 1263 mtx_unlock(&sc->xb_io_lock); 1264 1265 /* Free resources associated with old device channel. */ 1266 if (sc->ring.sring != NULL) { 1267 sring_page_ptr = (uint8_t *)sc->ring.sring; 1268 for (i = 0; i < sc->ring_pages; i++) { 1269 if (sc->ring_ref[i] != GRANT_INVALID_REF) { 1270 gnttab_end_foreign_access_ref(sc->ring_ref[i]); 1271 sc->ring_ref[i] = GRANT_INVALID_REF; 1272 } 1273 sring_page_ptr += PAGE_SIZE; 1274 } 1275 free(sc->ring.sring, M_XENBLOCKFRONT); 1276 sc->ring.sring = NULL; 1277 } 1278 1279 if (sc->shadow) { 1280 1281 for (i = 0; i < sc->max_requests; i++) { 1282 struct xb_command *cm; 1283 1284 cm = &sc->shadow[i]; 1285 if (cm->sg_refs != NULL) { 1286 free(cm->sg_refs, M_XENBLOCKFRONT); 1287 cm->sg_refs = NULL; 1288 } 1289 1290 bus_dmamap_destroy(sc->xb_io_dmat, cm->map); 1291 } 1292 free(sc->shadow, M_XENBLOCKFRONT); 1293 sc->shadow = NULL; 1294 1295 bus_dma_tag_destroy(sc->xb_io_dmat); 1296 1297 xb_initq_free(sc); 1298 xb_initq_ready(sc); 1299 xb_initq_complete(sc); 1300 } 1301 1302 if (sc->irq) { 1303 unbind_from_irqhandler(sc->irq); 1304 sc->irq = 0; 1305 } 1306 } 1307 1308 static int 1309 blkif_completion(struct xb_command *s) 1310 { 1311 //printf("%s: Req %p(%d)\n", __func__, s, s->nseg); 1312 gnttab_end_foreign_access_references(s->nseg, s->sg_refs); 1313 return (BLKIF_SEGS_TO_BLOCKS(s->nseg)); 1314 } 1315 1316 /* ** Driver registration ** */ 1317 static device_method_t blkfront_methods[] = { 1318 /* Device interface */ 1319 DEVMETHOD(device_probe, blkfront_probe), 1320 DEVMETHOD(device_attach, blkfront_attach), 1321 DEVMETHOD(device_detach, blkfront_detach), 1322 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1323 DEVMETHOD(device_suspend, blkfront_suspend), 1324 DEVMETHOD(device_resume, blkfront_resume), 1325 1326 /* Xenbus interface */ 1327 DEVMETHOD(xenbus_otherend_changed, blkfront_backend_changed), 1328 1329 { 0, 0 } 1330 }; 1331 1332 static driver_t blkfront_driver = { 1333 "xbd", 1334 blkfront_methods, 1335 sizeof(struct xb_softc), 1336 }; 1337 devclass_t blkfront_devclass; 1338 1339 DRIVER_MODULE(xbd, xenbusb_front, blkfront_driver, blkfront_devclass, 0, 0); 1340