1 /* 2 * XenBSD block device driver 3 * 4 * Copyright (c) 2009 Scott Long, Yahoo! 5 * Copyright (c) 2009 Frank Suchomel, Citrix 6 * Copyright (c) 2009 Doug F. Rabson, Citrix 7 * Copyright (c) 2005 Kip Macy 8 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 9 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 10 * 11 * 12 * Permission is hereby granted, free of charge, to any person obtaining a copy 13 * of this software and associated documentation files (the "Software"), to 14 * deal in the Software without restriction, including without limitation the 15 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 16 * sell copies of the Software, and to permit persons to whom the Software is 17 * furnished to do so, subject to the following conditions: 18 * 19 * The above copyright notice and this permission notice shall be included in 20 * all copies or substantial portions of the Software. 21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 27 * DEALINGS IN THE SOFTWARE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/kernel.h> 37 #include <vm/vm.h> 38 #include <vm/pmap.h> 39 40 #include <sys/bio.h> 41 #include <sys/bus.h> 42 #include <sys/conf.h> 43 #include <sys/module.h> 44 45 #include <machine/bus.h> 46 #include <sys/rman.h> 47 #include <machine/resource.h> 48 #include <machine/intr_machdep.h> 49 #include <machine/vmparam.h> 50 #include <sys/bus_dma.h> 51 52 #include <machine/_inttypes.h> 53 #include <machine/xen/xen-os.h> 54 #include <machine/xen/xenfunc.h> 55 56 #include <xen/hypervisor.h> 57 #include <xen/xen_intr.h> 58 #include <xen/evtchn.h> 59 #include <xen/gnttab.h> 60 #include <xen/interface/grant_table.h> 61 #include <xen/interface/io/protocols.h> 62 #include <xen/xenbus/xenbusvar.h> 63 64 #include <geom/geom_disk.h> 65 66 #include <dev/xen/blkfront/block.h> 67 68 #include "xenbus_if.h" 69 70 /* prototypes */ 71 static void xb_free_command(struct xb_command *cm); 72 static void xb_startio(struct xb_softc *sc); 73 static void blkfront_connect(struct xb_softc *); 74 static void blkfront_closing(device_t); 75 static int blkfront_detach(device_t); 76 static int setup_blkring(struct xb_softc *); 77 static void blkif_int(void *); 78 static void blkfront_initialize(struct xb_softc *); 79 #if 0 80 static void blkif_recover(struct xb_softc *); 81 #endif 82 static int blkif_completion(struct xb_command *); 83 static void blkif_free(struct xb_softc *, int); 84 static void blkif_queue_cb(void *, bus_dma_segment_t *, int, int); 85 86 MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data"); 87 88 #define GRANT_INVALID_REF 0 89 90 /* Control whether runtime update of vbds is enabled. */ 91 #define ENABLE_VBD_UPDATE 0 92 93 #if ENABLE_VBD_UPDATE 94 static void vbd_update(void); 95 #endif 96 97 #define BLKIF_STATE_DISCONNECTED 0 98 #define BLKIF_STATE_CONNECTED 1 99 #define BLKIF_STATE_SUSPENDED 2 100 101 #ifdef notyet 102 static char *blkif_state_name[] = { 103 [BLKIF_STATE_DISCONNECTED] = "disconnected", 104 [BLKIF_STATE_CONNECTED] = "connected", 105 [BLKIF_STATE_SUSPENDED] = "closed", 106 }; 107 108 static char * blkif_status_name[] = { 109 [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", 110 [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", 111 [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", 112 [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", 113 }; 114 #endif 115 116 #if 0 117 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args) 118 #else 119 #define DPRINTK(fmt, args...) 120 #endif 121 122 static int blkif_open(struct disk *dp); 123 static int blkif_close(struct disk *dp); 124 static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td); 125 static int blkif_queue_request(struct xb_softc *sc, struct xb_command *cm); 126 static void xb_strategy(struct bio *bp); 127 128 // In order to quiesce the device during kernel dumps, outstanding requests to 129 // DOM0 for disk reads/writes need to be accounted for. 130 static int xb_dump(void *, void *, vm_offset_t, off_t, size_t); 131 132 /* XXX move to xb_vbd.c when VBD update support is added */ 133 #define MAX_VBDS 64 134 135 #define XBD_SECTOR_SIZE 512 /* XXX: assume for now */ 136 #define XBD_SECTOR_SHFT 9 137 138 /* 139 * Translate Linux major/minor to an appropriate name and unit 140 * number. For HVM guests, this allows us to use the same drive names 141 * with blkfront as the emulated drives, easing transition slightly. 142 */ 143 static void 144 blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name) 145 { 146 static struct vdev_info { 147 int major; 148 int shift; 149 int base; 150 const char *name; 151 } info[] = { 152 {3, 6, 0, "ad"}, /* ide0 */ 153 {22, 6, 2, "ad"}, /* ide1 */ 154 {33, 6, 4, "ad"}, /* ide2 */ 155 {34, 6, 6, "ad"}, /* ide3 */ 156 {56, 6, 8, "ad"}, /* ide4 */ 157 {57, 6, 10, "ad"}, /* ide5 */ 158 {88, 6, 12, "ad"}, /* ide6 */ 159 {89, 6, 14, "ad"}, /* ide7 */ 160 {90, 6, 16, "ad"}, /* ide8 */ 161 {91, 6, 18, "ad"}, /* ide9 */ 162 163 {8, 4, 0, "da"}, /* scsi disk0 */ 164 {65, 4, 16, "da"}, /* scsi disk1 */ 165 {66, 4, 32, "da"}, /* scsi disk2 */ 166 {67, 4, 48, "da"}, /* scsi disk3 */ 167 {68, 4, 64, "da"}, /* scsi disk4 */ 168 {69, 4, 80, "da"}, /* scsi disk5 */ 169 {70, 4, 96, "da"}, /* scsi disk6 */ 170 {71, 4, 112, "da"}, /* scsi disk7 */ 171 {128, 4, 128, "da"}, /* scsi disk8 */ 172 {129, 4, 144, "da"}, /* scsi disk9 */ 173 {130, 4, 160, "da"}, /* scsi disk10 */ 174 {131, 4, 176, "da"}, /* scsi disk11 */ 175 {132, 4, 192, "da"}, /* scsi disk12 */ 176 {133, 4, 208, "da"}, /* scsi disk13 */ 177 {134, 4, 224, "da"}, /* scsi disk14 */ 178 {135, 4, 240, "da"}, /* scsi disk15 */ 179 180 {202, 4, 0, "xbd"}, /* xbd */ 181 182 {0, 0, 0, NULL}, 183 }; 184 int major = vdevice >> 8; 185 int minor = vdevice & 0xff; 186 int i; 187 188 if (vdevice & (1 << 28)) { 189 *unit = (vdevice & ((1 << 28) - 1)) >> 8; 190 *name = "xbd"; 191 } 192 193 for (i = 0; info[i].major; i++) { 194 if (info[i].major == major) { 195 *unit = info[i].base + (minor >> info[i].shift); 196 *name = info[i].name; 197 return; 198 } 199 } 200 201 *unit = minor >> 4; 202 *name = "xbd"; 203 } 204 205 int 206 xlvbd_add(struct xb_softc *sc, blkif_sector_t sectors, 207 int vdevice, uint16_t vdisk_info, unsigned long sector_size) 208 { 209 int unit, error = 0; 210 const char *name; 211 212 blkfront_vdevice_to_unit(vdevice, &unit, &name); 213 214 sc->xb_unit = unit; 215 216 if (strcmp(name, "xbd")) 217 device_printf(sc->xb_dev, "attaching as %s%d\n", name, unit); 218 219 sc->xb_disk = disk_alloc(); 220 sc->xb_disk->d_unit = sc->xb_unit; 221 sc->xb_disk->d_open = blkif_open; 222 sc->xb_disk->d_close = blkif_close; 223 sc->xb_disk->d_ioctl = blkif_ioctl; 224 sc->xb_disk->d_strategy = xb_strategy; 225 sc->xb_disk->d_dump = xb_dump; 226 sc->xb_disk->d_name = name; 227 sc->xb_disk->d_drv1 = sc; 228 sc->xb_disk->d_sectorsize = sector_size; 229 230 sc->xb_disk->d_mediasize = sectors * sector_size; 231 sc->xb_disk->d_maxsize = sc->max_request_size; 232 sc->xb_disk->d_flags = 0; 233 disk_create(sc->xb_disk, DISK_VERSION_00); 234 235 return error; 236 } 237 238 /************************ end VBD support *****************/ 239 240 /* 241 * Read/write routine for a buffer. Finds the proper unit, place it on 242 * the sortq and kick the controller. 243 */ 244 static void 245 xb_strategy(struct bio *bp) 246 { 247 struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; 248 249 /* bogus disk? */ 250 if (sc == NULL) { 251 bp->bio_error = EINVAL; 252 bp->bio_flags |= BIO_ERROR; 253 bp->bio_resid = bp->bio_bcount; 254 biodone(bp); 255 return; 256 } 257 258 /* 259 * Place it in the queue of disk activities for this disk 260 */ 261 mtx_lock(&sc->xb_io_lock); 262 263 xb_enqueue_bio(sc, bp); 264 xb_startio(sc); 265 266 mtx_unlock(&sc->xb_io_lock); 267 return; 268 } 269 270 static void 271 xb_bio_complete(struct xb_softc *sc, struct xb_command *cm) 272 { 273 struct bio *bp; 274 275 bp = cm->bp; 276 277 if ( unlikely(cm->status != BLKIF_RSP_OKAY) ) { 278 disk_err(bp, "disk error" , -1, 0); 279 printf(" status: %x\n", cm->status); 280 bp->bio_flags |= BIO_ERROR; 281 } 282 283 if (bp->bio_flags & BIO_ERROR) 284 bp->bio_error = EIO; 285 else 286 bp->bio_resid = 0; 287 288 xb_free_command(cm); 289 biodone(bp); 290 } 291 292 // Quiesce the disk writes for a dump file before allowing the next buffer. 293 static void 294 xb_quiesce(struct xb_softc *sc) 295 { 296 int mtd; 297 298 // While there are outstanding requests 299 while (!TAILQ_EMPTY(&sc->cm_busy)) { 300 RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, mtd); 301 if (mtd) { 302 /* Recieved request completions, update queue. */ 303 blkif_int(sc); 304 } 305 if (!TAILQ_EMPTY(&sc->cm_busy)) { 306 /* 307 * Still pending requests, wait for the disk i/o 308 * to complete. 309 */ 310 HYPERVISOR_yield(); 311 } 312 } 313 } 314 315 /* Kernel dump function for a paravirtualized disk device */ 316 static void 317 xb_dump_complete(struct xb_command *cm) 318 { 319 320 xb_enqueue_complete(cm); 321 } 322 323 static int 324 xb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, 325 size_t length) 326 { 327 struct disk *dp = arg; 328 struct xb_softc *sc = (struct xb_softc *) dp->d_drv1; 329 struct xb_command *cm; 330 size_t chunk; 331 int sbp; 332 int rc = 0; 333 334 if (length <= 0) 335 return (rc); 336 337 xb_quiesce(sc); /* All quiet on the western front. */ 338 339 /* 340 * If this lock is held, then this module is failing, and a 341 * successful kernel dump is highly unlikely anyway. 342 */ 343 mtx_lock(&sc->xb_io_lock); 344 345 /* Split the 64KB block as needed */ 346 for (sbp=0; length > 0; sbp++) { 347 cm = xb_dequeue_free(sc); 348 if (cm == NULL) { 349 mtx_unlock(&sc->xb_io_lock); 350 device_printf(sc->xb_dev, "dump: no more commands?\n"); 351 return (EBUSY); 352 } 353 354 if (gnttab_alloc_grant_references(sc->max_request_segments, 355 &cm->gref_head) != 0) { 356 xb_free_command(cm); 357 mtx_unlock(&sc->xb_io_lock); 358 device_printf(sc->xb_dev, "no more grant allocs?\n"); 359 return (EBUSY); 360 } 361 362 chunk = length > sc->max_request_size 363 ? sc->max_request_size : length; 364 cm->data = virtual; 365 cm->datalen = chunk; 366 cm->operation = BLKIF_OP_WRITE; 367 cm->sector_number = offset / dp->d_sectorsize; 368 cm->cm_complete = xb_dump_complete; 369 370 xb_enqueue_ready(cm); 371 372 length -= chunk; 373 offset += chunk; 374 virtual = (char *) virtual + chunk; 375 } 376 377 /* Tell DOM0 to do the I/O */ 378 xb_startio(sc); 379 mtx_unlock(&sc->xb_io_lock); 380 381 /* Poll for the completion. */ 382 xb_quiesce(sc); /* All quite on the eastern front */ 383 384 /* If there were any errors, bail out... */ 385 while ((cm = xb_dequeue_complete(sc)) != NULL) { 386 if (cm->status != BLKIF_RSP_OKAY) { 387 device_printf(sc->xb_dev, 388 "Dump I/O failed at sector %jd\n", 389 cm->sector_number); 390 rc = EIO; 391 } 392 xb_free_command(cm); 393 } 394 395 return (rc); 396 } 397 398 399 static int 400 blkfront_probe(device_t dev) 401 { 402 403 if (!strcmp(xenbus_get_type(dev), "vbd")) { 404 device_set_desc(dev, "Virtual Block Device"); 405 device_quiet(dev); 406 return (0); 407 } 408 409 return (ENXIO); 410 } 411 412 /* 413 * Setup supplies the backend dir, virtual device. We place an event 414 * channel and shared frame entries. We watch backend to wait if it's 415 * ok. 416 */ 417 static int 418 blkfront_attach(device_t dev) 419 { 420 struct xb_softc *sc; 421 const char *name; 422 int error; 423 int vdevice; 424 int i; 425 int unit; 426 427 /* FIXME: Use dynamic device id if this is not set. */ 428 error = xs_scanf(XST_NIL, xenbus_get_node(dev), 429 "virtual-device", NULL, "%i", &vdevice); 430 if (error) { 431 xenbus_dev_fatal(dev, error, "reading virtual-device"); 432 device_printf(dev, "Couldn't determine virtual device.\n"); 433 return (error); 434 } 435 436 blkfront_vdevice_to_unit(vdevice, &unit, &name); 437 if (!strcmp(name, "xbd")) 438 device_set_unit(dev, unit); 439 440 sc = device_get_softc(dev); 441 mtx_init(&sc->xb_io_lock, "blkfront i/o lock", NULL, MTX_DEF); 442 xb_initq_free(sc); 443 xb_initq_busy(sc); 444 xb_initq_ready(sc); 445 xb_initq_complete(sc); 446 xb_initq_bio(sc); 447 for (i = 0; i < XBF_MAX_RING_PAGES; i++) 448 sc->ring_ref[i] = GRANT_INVALID_REF; 449 450 sc->xb_dev = dev; 451 sc->vdevice = vdevice; 452 sc->connected = BLKIF_STATE_DISCONNECTED; 453 454 /* Front end dir is a number, which is used as the id. */ 455 sc->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0); 456 457 /* Wait for backend device to publish its protocol capabilities. */ 458 xenbus_set_state(dev, XenbusStateInitialising); 459 460 return (0); 461 } 462 463 static int 464 blkfront_suspend(device_t dev) 465 { 466 struct xb_softc *sc = device_get_softc(dev); 467 468 /* Prevent new requests being issued until we fix things up. */ 469 mtx_lock(&sc->xb_io_lock); 470 sc->connected = BLKIF_STATE_SUSPENDED; 471 mtx_unlock(&sc->xb_io_lock); 472 473 return (0); 474 } 475 476 static int 477 blkfront_resume(device_t dev) 478 { 479 #if 0 480 struct xb_softc *sc = device_get_softc(dev); 481 482 DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev)); 483 484 /* XXX This can't work!!! */ 485 blkif_free(sc, 1); 486 blkfront_initialize(sc); 487 if (sc->connected == BLKIF_STATE_SUSPENDED) 488 blkif_recover(sc); 489 #endif 490 return (0); 491 } 492 493 static void 494 blkfront_initialize(struct xb_softc *sc) 495 { 496 const char *otherend_path; 497 const char *node_path; 498 int error; 499 int i; 500 501 if (xenbus_get_state(sc->xb_dev) != XenbusStateInitialising) 502 return; 503 504 /* 505 * Protocol defaults valid even if negotiation for a 506 * setting fails. 507 */ 508 sc->ring_pages = 1; 509 sc->max_requests = BLKIF_MAX_RING_REQUESTS(PAGE_SIZE); 510 sc->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK; 511 sc->max_request_size = sc->max_request_segments * PAGE_SIZE; 512 sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments); 513 514 /* 515 * Protocol negotiation. 516 * 517 * \note xs_gather() returns on the first encountered error, so 518 * we must use independant calls in order to guarantee 519 * we don't miss information in a sparsly populated back-end 520 * tree. 521 */ 522 otherend_path = xenbus_get_otherend_path(sc->xb_dev); 523 node_path = xenbus_get_node(sc->xb_dev); 524 (void)xs_scanf(XST_NIL, otherend_path, 525 "max-ring-pages", NULL, "%" PRIu32, 526 &sc->ring_pages); 527 528 (void)xs_scanf(XST_NIL, otherend_path, 529 "max-requests", NULL, "%" PRIu32, 530 &sc->max_requests); 531 532 (void)xs_scanf(XST_NIL, otherend_path, 533 "max-request-segments", NULL, "%" PRIu32, 534 &sc->max_request_segments); 535 536 (void)xs_scanf(XST_NIL, otherend_path, 537 "max-request-size", NULL, "%" PRIu32, 538 &sc->max_request_size); 539 540 if (sc->ring_pages > XBF_MAX_RING_PAGES) { 541 device_printf(sc->xb_dev, "Back-end specified ring-pages of " 542 "%u limited to front-end limit of %zu.\n", 543 sc->ring_pages, XBF_MAX_RING_PAGES); 544 sc->ring_pages = XBF_MAX_RING_PAGES; 545 } 546 547 if (sc->max_requests > XBF_MAX_REQUESTS) { 548 device_printf(sc->xb_dev, "Back-end specified max_requests of " 549 "%u limited to front-end limit of %u.\n", 550 sc->max_requests, XBF_MAX_REQUESTS); 551 sc->max_requests = XBF_MAX_REQUESTS; 552 } 553 554 if (sc->max_request_segments > XBF_MAX_SEGMENTS_PER_REQUEST) { 555 device_printf(sc->xb_dev, "Back-end specificed " 556 "max_requests_segments of %u limited to " 557 "front-end limit of %u.\n", 558 sc->max_request_segments, 559 XBF_MAX_SEGMENTS_PER_REQUEST); 560 sc->max_request_segments = XBF_MAX_SEGMENTS_PER_REQUEST; 561 } 562 563 if (sc->max_request_size > XBF_MAX_REQUEST_SIZE) { 564 device_printf(sc->xb_dev, "Back-end specificed " 565 "max_request_size of %u limited to front-end " 566 "limit of %u.\n", sc->max_request_size, 567 XBF_MAX_REQUEST_SIZE); 568 sc->max_request_size = XBF_MAX_REQUEST_SIZE; 569 } 570 sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments); 571 572 /* Allocate datastructures based on negotiated values. */ 573 error = bus_dma_tag_create(NULL, /* parent */ 574 512, PAGE_SIZE, /* algnmnt, boundary */ 575 BUS_SPACE_MAXADDR, /* lowaddr */ 576 BUS_SPACE_MAXADDR, /* highaddr */ 577 NULL, NULL, /* filter, filterarg */ 578 sc->max_request_size, 579 sc->max_request_segments, 580 PAGE_SIZE, /* maxsegsize */ 581 BUS_DMA_ALLOCNOW, /* flags */ 582 busdma_lock_mutex, /* lockfunc */ 583 &sc->xb_io_lock, /* lockarg */ 584 &sc->xb_io_dmat); 585 if (error != 0) { 586 xenbus_dev_fatal(sc->xb_dev, error, 587 "Cannot allocate parent DMA tag\n"); 588 return; 589 } 590 591 /* Per-transaction data allocation. */ 592 sc->shadow = malloc(sizeof(*sc->shadow) * sc->max_requests, 593 M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); 594 if (sc->shadow == NULL) { 595 xenbus_dev_fatal(sc->xb_dev, error, 596 "Cannot allocate request structures\n"); 597 } 598 599 for (i = 0; i < sc->max_requests; i++) { 600 struct xb_command *cm; 601 602 cm = &sc->shadow[i]; 603 cm->sg_refs = malloc(sizeof(grant_ref_t) 604 * sc->max_request_segments, 605 M_XENBLOCKFRONT, M_NOWAIT); 606 if (cm->sg_refs == NULL) 607 break; 608 cm->id = i; 609 cm->cm_sc = sc; 610 if (bus_dmamap_create(sc->xb_io_dmat, 0, &cm->map) != 0) 611 break; 612 xb_free_command(cm); 613 } 614 615 if (setup_blkring(sc) != 0) 616 return; 617 618 error = xs_printf(XST_NIL, node_path, 619 "ring-pages","%u", sc->ring_pages); 620 if (error) { 621 xenbus_dev_fatal(sc->xb_dev, error, 622 "writing %s/ring-pages", 623 node_path); 624 return; 625 } 626 627 error = xs_printf(XST_NIL, node_path, 628 "max-requests","%u", sc->max_requests); 629 if (error) { 630 xenbus_dev_fatal(sc->xb_dev, error, 631 "writing %s/max-requests", 632 node_path); 633 return; 634 } 635 636 error = xs_printf(XST_NIL, node_path, 637 "max-request-segments","%u", sc->max_request_segments); 638 if (error) { 639 xenbus_dev_fatal(sc->xb_dev, error, 640 "writing %s/max-request-segments", 641 node_path); 642 return; 643 } 644 645 error = xs_printf(XST_NIL, node_path, 646 "max-request-size","%u", sc->max_request_size); 647 if (error) { 648 xenbus_dev_fatal(sc->xb_dev, error, 649 "writing %s/max-request-size", 650 node_path); 651 return; 652 } 653 654 error = xs_printf(XST_NIL, node_path, "event-channel", 655 "%u", irq_to_evtchn_port(sc->irq)); 656 if (error) { 657 xenbus_dev_fatal(sc->xb_dev, error, 658 "writing %s/event-channel", 659 node_path); 660 return; 661 } 662 663 error = xs_printf(XST_NIL, node_path, 664 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); 665 if (error) { 666 xenbus_dev_fatal(sc->xb_dev, error, 667 "writing %s/protocol", 668 node_path); 669 return; 670 } 671 672 xenbus_set_state(sc->xb_dev, XenbusStateInitialised); 673 } 674 675 static int 676 setup_blkring(struct xb_softc *sc) 677 { 678 blkif_sring_t *sring; 679 uintptr_t sring_page_addr; 680 int error; 681 int i; 682 683 sring = malloc(sc->ring_pages * PAGE_SIZE, M_XENBLOCKFRONT, 684 M_NOWAIT|M_ZERO); 685 if (sring == NULL) { 686 xenbus_dev_fatal(sc->xb_dev, ENOMEM, "allocating shared ring"); 687 return (ENOMEM); 688 } 689 SHARED_RING_INIT(sring); 690 FRONT_RING_INIT(&sc->ring, sring, sc->ring_pages * PAGE_SIZE); 691 692 for (i = 0, sring_page_addr = (uintptr_t)sring; 693 i < sc->ring_pages; 694 i++, sring_page_addr += PAGE_SIZE) { 695 696 error = xenbus_grant_ring(sc->xb_dev, 697 (vtomach(sring_page_addr) >> PAGE_SHIFT), &sc->ring_ref[i]); 698 if (error) { 699 xenbus_dev_fatal(sc->xb_dev, error, 700 "granting ring_ref(%d)", i); 701 return (error); 702 } 703 } 704 error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev), 705 "ring-ref","%u", sc->ring_ref[0]); 706 if (error) { 707 xenbus_dev_fatal(sc->xb_dev, error, "writing %s/ring-ref", 708 xenbus_get_node(sc->xb_dev)); 709 return (error); 710 } 711 for (i = 1; i < sc->ring_pages; i++) { 712 char ring_ref_name[]= "ring_refXX"; 713 714 snprintf(ring_ref_name, sizeof(ring_ref_name), "ring-ref%u", i); 715 error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev), 716 ring_ref_name, "%u", sc->ring_ref[i]); 717 if (error) { 718 xenbus_dev_fatal(sc->xb_dev, error, "writing %s/%s", 719 xenbus_get_node(sc->xb_dev), 720 ring_ref_name); 721 return (error); 722 } 723 } 724 725 error = bind_listening_port_to_irqhandler( 726 xenbus_get_otherend_id(sc->xb_dev), 727 "xbd", (driver_intr_t *)blkif_int, sc, 728 INTR_TYPE_BIO | INTR_MPSAFE, &sc->irq); 729 if (error) { 730 xenbus_dev_fatal(sc->xb_dev, error, 731 "bind_evtchn_to_irqhandler failed"); 732 return (error); 733 } 734 735 return (0); 736 } 737 738 /** 739 * Callback received when the backend's state changes. 740 */ 741 static int 742 blkfront_backend_changed(device_t dev, XenbusState backend_state) 743 { 744 struct xb_softc *sc = device_get_softc(dev); 745 746 DPRINTK("backend_state=%d\n", backend_state); 747 748 switch (backend_state) { 749 case XenbusStateUnknown: 750 case XenbusStateInitialising: 751 case XenbusStateReconfigured: 752 case XenbusStateReconfiguring: 753 case XenbusStateClosed: 754 break; 755 756 case XenbusStateInitWait: 757 blkfront_initialize(sc); 758 break; 759 760 case XenbusStateInitialised: 761 case XenbusStateConnected: 762 blkfront_initialize(sc); 763 blkfront_connect(sc); 764 break; 765 766 case XenbusStateClosing: 767 if (sc->users > 0) 768 xenbus_dev_error(dev, -EBUSY, 769 "Device in use; refusing to close"); 770 else 771 blkfront_closing(dev); 772 break; 773 } 774 775 return (0); 776 } 777 778 /* 779 ** Invoked when the backend is finally 'ready' (and has told produced 780 ** the details about the physical device - #sectors, size, etc). 781 */ 782 static void 783 blkfront_connect(struct xb_softc *sc) 784 { 785 device_t dev = sc->xb_dev; 786 unsigned long sectors, sector_size; 787 unsigned int binfo; 788 int err, feature_barrier; 789 790 if( (sc->connected == BLKIF_STATE_CONNECTED) || 791 (sc->connected == BLKIF_STATE_SUSPENDED) ) 792 return; 793 794 DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); 795 796 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), 797 "sectors", "%lu", §ors, 798 "info", "%u", &binfo, 799 "sector-size", "%lu", §or_size, 800 NULL); 801 if (err) { 802 xenbus_dev_fatal(dev, err, 803 "reading backend fields at %s", 804 xenbus_get_otherend_path(dev)); 805 return; 806 } 807 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), 808 "feature-barrier", "%lu", &feature_barrier, 809 NULL); 810 if (!err || feature_barrier) 811 sc->xb_flags |= XB_BARRIER; 812 813 device_printf(dev, "%juMB <%s> at %s", 814 (uintmax_t) sectors / (1048576 / sector_size), 815 device_get_desc(dev), 816 xenbus_get_node(dev)); 817 bus_print_child_footer(device_get_parent(dev), dev); 818 819 xlvbd_add(sc, sectors, sc->vdevice, binfo, sector_size); 820 821 (void)xenbus_set_state(dev, XenbusStateConnected); 822 823 /* Kick pending requests. */ 824 mtx_lock(&sc->xb_io_lock); 825 sc->connected = BLKIF_STATE_CONNECTED; 826 xb_startio(sc); 827 sc->xb_flags |= XB_READY; 828 mtx_unlock(&sc->xb_io_lock); 829 830 } 831 832 /** 833 * Handle the change of state of the backend to Closing. We must delete our 834 * device-layer structures now, to ensure that writes are flushed through to 835 * the backend. Once this is done, we can switch to Closed in 836 * acknowledgement. 837 */ 838 static void 839 blkfront_closing(device_t dev) 840 { 841 struct xb_softc *sc = device_get_softc(dev); 842 843 xenbus_set_state(dev, XenbusStateClosing); 844 845 DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev)); 846 847 if (sc->xb_disk != NULL) { 848 disk_destroy(sc->xb_disk); 849 sc->xb_disk = NULL; 850 } 851 852 xenbus_set_state(dev, XenbusStateClosed); 853 } 854 855 856 static int 857 blkfront_detach(device_t dev) 858 { 859 struct xb_softc *sc = device_get_softc(dev); 860 861 DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev)); 862 863 blkif_free(sc, 0); 864 mtx_destroy(&sc->xb_io_lock); 865 866 return 0; 867 } 868 869 870 static inline void 871 flush_requests(struct xb_softc *sc) 872 { 873 int notify; 874 875 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->ring, notify); 876 877 if (notify) 878 notify_remote_via_irq(sc->irq); 879 } 880 881 static void 882 blkif_restart_queue_callback(void *arg) 883 { 884 struct xb_softc *sc = arg; 885 886 mtx_lock(&sc->xb_io_lock); 887 888 xb_startio(sc); 889 890 mtx_unlock(&sc->xb_io_lock); 891 } 892 893 static int 894 blkif_open(struct disk *dp) 895 { 896 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 897 898 if (sc == NULL) { 899 printf("xb%d: not found", sc->xb_unit); 900 return (ENXIO); 901 } 902 903 sc->xb_flags |= XB_OPEN; 904 sc->users++; 905 return (0); 906 } 907 908 static int 909 blkif_close(struct disk *dp) 910 { 911 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 912 913 if (sc == NULL) 914 return (ENXIO); 915 sc->xb_flags &= ~XB_OPEN; 916 if (--(sc->users) == 0) { 917 /* Check whether we have been instructed to close. We will 918 have ignored this request initially, as the device was 919 still mounted. */ 920 device_t dev = sc->xb_dev; 921 XenbusState state = 922 xenbus_read_driver_state(xenbus_get_otherend_path(dev)); 923 924 if (state == XenbusStateClosing) 925 blkfront_closing(dev); 926 } 927 return (0); 928 } 929 930 static int 931 blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) 932 { 933 struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; 934 935 if (sc == NULL) 936 return (ENXIO); 937 938 return (ENOTTY); 939 } 940 941 static void 942 xb_free_command(struct xb_command *cm) 943 { 944 945 KASSERT((cm->cm_flags & XB_ON_XBQ_MASK) == 0, 946 ("Freeing command that is still on a queue\n")); 947 948 cm->cm_flags = 0; 949 cm->bp = NULL; 950 cm->cm_complete = NULL; 951 xb_enqueue_free(cm); 952 } 953 954 /* 955 * blkif_queue_request 956 * 957 * request block io 958 * 959 * id: for guest use only. 960 * operation: BLKIF_OP_{READ,WRITE,PROBE} 961 * buffer: buffer to read/write into. this should be a 962 * virtual address in the guest os. 963 */ 964 static struct xb_command * 965 xb_bio_command(struct xb_softc *sc) 966 { 967 struct xb_command *cm; 968 struct bio *bp; 969 970 if (unlikely(sc->connected != BLKIF_STATE_CONNECTED)) 971 return (NULL); 972 973 bp = xb_dequeue_bio(sc); 974 if (bp == NULL) 975 return (NULL); 976 977 if ((cm = xb_dequeue_free(sc)) == NULL) { 978 xb_requeue_bio(sc, bp); 979 return (NULL); 980 } 981 982 if (gnttab_alloc_grant_references(sc->max_request_segments, 983 &cm->gref_head) != 0) { 984 gnttab_request_free_callback(&sc->callback, 985 blkif_restart_queue_callback, sc, 986 sc->max_request_segments); 987 xb_requeue_bio(sc, bp); 988 xb_enqueue_free(cm); 989 sc->xb_flags |= XB_FROZEN; 990 return (NULL); 991 } 992 993 cm->bp = bp; 994 cm->data = bp->bio_data; 995 cm->datalen = bp->bio_bcount; 996 cm->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ : 997 BLKIF_OP_WRITE; 998 cm->sector_number = (blkif_sector_t)bp->bio_pblkno; 999 1000 return (cm); 1001 } 1002 1003 static int 1004 blkif_queue_request(struct xb_softc *sc, struct xb_command *cm) 1005 { 1006 int error; 1007 1008 error = bus_dmamap_load(sc->xb_io_dmat, cm->map, cm->data, cm->datalen, 1009 blkif_queue_cb, cm, 0); 1010 if (error == EINPROGRESS) { 1011 printf("EINPROGRESS\n"); 1012 sc->xb_flags |= XB_FROZEN; 1013 cm->cm_flags |= XB_CMD_FROZEN; 1014 return (0); 1015 } 1016 1017 return (error); 1018 } 1019 1020 static void 1021 blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 1022 { 1023 struct xb_softc *sc; 1024 struct xb_command *cm; 1025 blkif_request_t *ring_req; 1026 struct blkif_request_segment *sg; 1027 struct blkif_request_segment *last_block_sg; 1028 grant_ref_t *sg_ref; 1029 vm_paddr_t buffer_ma; 1030 uint64_t fsect, lsect; 1031 int ref; 1032 int op; 1033 int block_segs; 1034 1035 cm = arg; 1036 sc = cm->cm_sc; 1037 1038 //printf("%s: Start\n", __func__); 1039 if (error) { 1040 printf("error %d in blkif_queue_cb\n", error); 1041 cm->bp->bio_error = EIO; 1042 biodone(cm->bp); 1043 xb_free_command(cm); 1044 return; 1045 } 1046 1047 /* Fill out a communications ring structure. */ 1048 ring_req = RING_GET_REQUEST(&sc->ring, sc->ring.req_prod_pvt); 1049 sc->ring.req_prod_pvt++; 1050 ring_req->id = cm->id; 1051 ring_req->operation = cm->operation; 1052 ring_req->sector_number = cm->sector_number; 1053 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk; 1054 ring_req->nr_segments = nsegs; 1055 cm->nseg = nsegs; 1056 1057 block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK); 1058 sg = ring_req->seg; 1059 last_block_sg = sg + block_segs; 1060 sg_ref = cm->sg_refs; 1061 1062 while (1) { 1063 1064 while (sg < last_block_sg) { 1065 buffer_ma = segs->ds_addr; 1066 fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; 1067 lsect = fsect + (segs->ds_len >> XBD_SECTOR_SHFT) - 1; 1068 1069 KASSERT(lsect <= 7, ("XEN disk driver data cannot " 1070 "cross a page boundary")); 1071 1072 /* install a grant reference. */ 1073 ref = gnttab_claim_grant_reference(&cm->gref_head); 1074 1075 /* 1076 * GNTTAB_LIST_END == 0xffffffff, but it is private 1077 * to gnttab.c. 1078 */ 1079 KASSERT(ref != ~0, ("grant_reference failed")); 1080 1081 gnttab_grant_foreign_access_ref( 1082 ref, 1083 xenbus_get_otherend_id(sc->xb_dev), 1084 buffer_ma >> PAGE_SHIFT, 1085 ring_req->operation == BLKIF_OP_WRITE); 1086 1087 *sg_ref = ref; 1088 *sg = (struct blkif_request_segment) { 1089 .gref = ref, 1090 .first_sect = fsect, 1091 .last_sect = lsect }; 1092 sg++; 1093 sg_ref++; 1094 segs++; 1095 nsegs--; 1096 } 1097 block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK); 1098 if (block_segs == 0) 1099 break; 1100 1101 sg = BLKRING_GET_SG_REQUEST(&sc->ring, sc->ring.req_prod_pvt); 1102 sc->ring.req_prod_pvt++; 1103 last_block_sg = sg + block_segs; 1104 } 1105 1106 if (cm->operation == BLKIF_OP_READ) 1107 op = BUS_DMASYNC_PREREAD; 1108 else if (cm->operation == BLKIF_OP_WRITE) 1109 op = BUS_DMASYNC_PREWRITE; 1110 else 1111 op = 0; 1112 bus_dmamap_sync(sc->xb_io_dmat, cm->map, op); 1113 1114 gnttab_free_grant_references(cm->gref_head); 1115 1116 xb_enqueue_busy(cm); 1117 1118 /* 1119 * This flag means that we're probably executing in the busdma swi 1120 * instead of in the startio context, so an explicit flush is needed. 1121 */ 1122 if (cm->cm_flags & XB_CMD_FROZEN) 1123 flush_requests(sc); 1124 1125 //printf("%s: Done\n", __func__); 1126 return; 1127 } 1128 1129 /* 1130 * Dequeue buffers and place them in the shared communication ring. 1131 * Return when no more requests can be accepted or all buffers have 1132 * been queued. 1133 * 1134 * Signal XEN once the ring has been filled out. 1135 */ 1136 static void 1137 xb_startio(struct xb_softc *sc) 1138 { 1139 struct xb_command *cm; 1140 int error, queued = 0; 1141 1142 mtx_assert(&sc->xb_io_lock, MA_OWNED); 1143 1144 while (RING_FREE_REQUESTS(&sc->ring) >= sc->max_request_blocks) { 1145 if (sc->xb_flags & XB_FROZEN) 1146 break; 1147 1148 cm = xb_dequeue_ready(sc); 1149 1150 if (cm == NULL) 1151 cm = xb_bio_command(sc); 1152 1153 if (cm == NULL) 1154 break; 1155 1156 if ((error = blkif_queue_request(sc, cm)) != 0) { 1157 printf("blkif_queue_request returned %d\n", error); 1158 break; 1159 } 1160 queued++; 1161 } 1162 1163 if (queued != 0) 1164 flush_requests(sc); 1165 } 1166 1167 static void 1168 blkif_int(void *xsc) 1169 { 1170 struct xb_softc *sc = xsc; 1171 struct xb_command *cm; 1172 blkif_response_t *bret; 1173 RING_IDX i, rp; 1174 int op; 1175 1176 mtx_lock(&sc->xb_io_lock); 1177 1178 if (unlikely(sc->connected != BLKIF_STATE_CONNECTED)) { 1179 mtx_unlock(&sc->xb_io_lock); 1180 return; 1181 } 1182 1183 again: 1184 rp = sc->ring.sring->rsp_prod; 1185 rmb(); /* Ensure we see queued responses up to 'rp'. */ 1186 1187 for (i = sc->ring.rsp_cons; i != rp;) { 1188 bret = RING_GET_RESPONSE(&sc->ring, i); 1189 cm = &sc->shadow[bret->id]; 1190 1191 xb_remove_busy(cm); 1192 i += blkif_completion(cm); 1193 1194 if (cm->operation == BLKIF_OP_READ) 1195 op = BUS_DMASYNC_POSTREAD; 1196 else if (cm->operation == BLKIF_OP_WRITE) 1197 op = BUS_DMASYNC_POSTWRITE; 1198 else 1199 op = 0; 1200 bus_dmamap_sync(sc->xb_io_dmat, cm->map, op); 1201 bus_dmamap_unload(sc->xb_io_dmat, cm->map); 1202 1203 /* 1204 * If commands are completing then resources are probably 1205 * being freed as well. It's a cheap assumption even when 1206 * wrong. 1207 */ 1208 sc->xb_flags &= ~XB_FROZEN; 1209 1210 /* 1211 * Directly call the i/o complete routine to save an 1212 * an indirection in the common case. 1213 */ 1214 cm->status = bret->status; 1215 if (cm->bp) 1216 xb_bio_complete(sc, cm); 1217 else if (cm->cm_complete) 1218 (cm->cm_complete)(cm); 1219 else 1220 xb_free_command(cm); 1221 } 1222 1223 sc->ring.rsp_cons = i; 1224 1225 if (i != sc->ring.req_prod_pvt) { 1226 int more_to_do; 1227 RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, more_to_do); 1228 if (more_to_do) 1229 goto again; 1230 } else { 1231 sc->ring.sring->rsp_event = i + 1; 1232 } 1233 1234 xb_startio(sc); 1235 1236 mtx_unlock(&sc->xb_io_lock); 1237 } 1238 1239 static void 1240 blkif_free(struct xb_softc *sc, int suspend) 1241 { 1242 uint8_t *sring_page_ptr; 1243 int i; 1244 1245 /* Prevent new requests being issued until we fix things up. */ 1246 mtx_lock(&sc->xb_io_lock); 1247 sc->connected = suspend ? 1248 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 1249 mtx_unlock(&sc->xb_io_lock); 1250 1251 /* Free resources associated with old device channel. */ 1252 if (sc->ring.sring != NULL) { 1253 sring_page_ptr = (uint8_t *)sc->ring.sring; 1254 for (i = 0; i < sc->ring_pages; i++) { 1255 if (sc->ring_ref[i] != GRANT_INVALID_REF) { 1256 gnttab_end_foreign_access_ref(sc->ring_ref[i]); 1257 sc->ring_ref[i] = GRANT_INVALID_REF; 1258 } 1259 sring_page_ptr += PAGE_SIZE; 1260 } 1261 free(sc->ring.sring, M_XENBLOCKFRONT); 1262 sc->ring.sring = NULL; 1263 } 1264 1265 if (sc->shadow) { 1266 1267 for (i = 0; i < sc->max_requests; i++) { 1268 struct xb_command *cm; 1269 1270 cm = &sc->shadow[i]; 1271 if (cm->sg_refs != NULL) { 1272 free(cm->sg_refs, M_XENBLOCKFRONT); 1273 cm->sg_refs = NULL; 1274 } 1275 1276 bus_dmamap_destroy(sc->xb_io_dmat, cm->map); 1277 } 1278 free(sc->shadow, M_XENBLOCKFRONT); 1279 sc->shadow = NULL; 1280 } 1281 1282 if (sc->irq) { 1283 unbind_from_irqhandler(sc->irq); 1284 sc->irq = 0; 1285 } 1286 } 1287 1288 static int 1289 blkif_completion(struct xb_command *s) 1290 { 1291 //printf("%s: Req %p(%d)\n", __func__, s, s->nseg); 1292 gnttab_end_foreign_access_references(s->nseg, s->sg_refs); 1293 return (BLKIF_SEGS_TO_BLOCKS(s->nseg)); 1294 } 1295 1296 #if 0 1297 static void 1298 blkif_recover(struct xb_softc *sc) 1299 { 1300 /* 1301 * XXX The whole concept of not quiescing and completing all i/o 1302 * during suspend, and then hoping to recover and replay the 1303 * resulting abandoned I/O during resume, is laughable. At best, 1304 * it invalidates the i/o ordering rules required by just about 1305 * every filesystem, and at worst it'll corrupt data. The code 1306 * has been removed until further notice. 1307 */ 1308 } 1309 #endif 1310 1311 /* ** Driver registration ** */ 1312 static device_method_t blkfront_methods[] = { 1313 /* Device interface */ 1314 DEVMETHOD(device_probe, blkfront_probe), 1315 DEVMETHOD(device_attach, blkfront_attach), 1316 DEVMETHOD(device_detach, blkfront_detach), 1317 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1318 DEVMETHOD(device_suspend, blkfront_suspend), 1319 DEVMETHOD(device_resume, blkfront_resume), 1320 1321 /* Xenbus interface */ 1322 DEVMETHOD(xenbus_otherend_changed, blkfront_backend_changed), 1323 1324 { 0, 0 } 1325 }; 1326 1327 static driver_t blkfront_driver = { 1328 "xbd", 1329 blkfront_methods, 1330 sizeof(struct xb_softc), 1331 }; 1332 devclass_t blkfront_devclass; 1333 1334 DRIVER_MODULE(xbd, xenbusb_front, blkfront_driver, blkfront_devclass, 0, 0); 1335