1 /*- 2 * Copyright (c) 2009-2012 Spectra Logic Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions, and the following disclaimer, 10 * without modification. 11 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 12 * substantially similar to the "NO WARRANTY" disclaimer below 13 * ("Disclaimer") and any redistribution must be conditioned upon 14 * including a substantially similar Disclaimer requirement for further 15 * binary redistribution. 16 * 17 * NO WARRANTY 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGES. 29 * 30 * Authors: Justin T. Gibbs (Spectra Logic Corporation) 31 * Ken Merry (Spectra Logic Corporation) 32 */ 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 /** 37 * \file blkback.c 38 * 39 * \brief Device driver supporting the vending of block storage from 40 * a FreeBSD domain to other domains. 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/malloc.h> 47 48 #include <sys/bio.h> 49 #include <sys/bus.h> 50 #include <sys/conf.h> 51 #include <sys/devicestat.h> 52 #include <sys/disk.h> 53 #include <sys/fcntl.h> 54 #include <sys/filedesc.h> 55 #include <sys/kdb.h> 56 #include <sys/module.h> 57 #include <sys/namei.h> 58 #include <sys/proc.h> 59 #include <sys/rman.h> 60 #include <sys/taskqueue.h> 61 #include <sys/types.h> 62 #include <sys/vnode.h> 63 #include <sys/mount.h> 64 #include <sys/sysctl.h> 65 #include <sys/bitstring.h> 66 #include <sys/sdt.h> 67 68 #include <geom/geom.h> 69 70 #include <machine/_inttypes.h> 71 72 #include <vm/vm.h> 73 #include <vm/vm_extern.h> 74 #include <vm/vm_kern.h> 75 76 #include <xen/xen-os.h> 77 #include <xen/blkif.h> 78 #include <xen/gnttab.h> 79 #include <xen/xen_intr.h> 80 81 #include <xen/interface/event_channel.h> 82 #include <xen/interface/grant_table.h> 83 84 #include <xen/xenbus/xenbusvar.h> 85 86 /*--------------------------- Compile-time Tunables --------------------------*/ 87 /** 88 * The maximum number of outstanding request blocks (request headers plus 89 * additional segment blocks) we will allow in a negotiated block-front/back 90 * communication channel. 91 */ 92 #define XBB_MAX_REQUESTS 256 93 94 /** 95 * \brief Define to force all I/O to be performed on memory owned by the 96 * backend device, with a copy-in/out to the remote domain's memory. 97 * 98 * \note This option is currently required when this driver's domain is 99 * operating in HVM mode on a system using an IOMMU. 100 * 101 * This driver uses Xen's grant table API to gain access to the memory of 102 * the remote domains it serves. When our domain is operating in PV mode, 103 * the grant table mechanism directly updates our domain's page table entries 104 * to point to the physical pages of the remote domain. This scheme guarantees 105 * that blkback and the backing devices it uses can safely perform DMA 106 * operations to satisfy requests. In HVM mode, Xen may use a HW IOMMU to 107 * insure that our domain cannot DMA to pages owned by another domain. As 108 * of Xen 4.0, IOMMU mappings for HVM guests are not updated via the grant 109 * table API. For this reason, in HVM mode, we must bounce all requests into 110 * memory that is mapped into our domain at domain startup and thus has 111 * valid IOMMU mappings. 112 */ 113 #define XBB_USE_BOUNCE_BUFFERS 114 115 /** 116 * \brief Define to enable rudimentary request logging to the console. 117 */ 118 #undef XBB_DEBUG 119 120 /*---------------------------------- Macros ----------------------------------*/ 121 /** 122 * Custom malloc type for all driver allocations. 123 */ 124 static MALLOC_DEFINE(M_XENBLOCKBACK, "xbbd", "Xen Block Back Driver Data"); 125 126 #ifdef XBB_DEBUG 127 #define DPRINTF(fmt, args...) \ 128 printf("xbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 129 #else 130 #define DPRINTF(fmt, args...) do {} while(0) 131 #endif 132 133 /** 134 * The maximum mapped region size per request we will allow in a negotiated 135 * block-front/back communication channel. 136 */ 137 #define XBB_MAX_REQUEST_SIZE \ 138 MIN(MAXPHYS, BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) 139 140 /** 141 * The maximum number of segments (within a request header and accompanying 142 * segment blocks) per request we will allow in a negotiated block-front/back 143 * communication channel. 144 */ 145 #define XBB_MAX_SEGMENTS_PER_REQUEST \ 146 (MIN(UIO_MAXIOV, \ 147 MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \ 148 (XBB_MAX_REQUEST_SIZE / PAGE_SIZE) + 1))) 149 150 /** 151 * The maximum number of shared memory ring pages we will allow in a 152 * negotiated block-front/back communication channel. Allow enough 153 * ring space for all requests to be XBB_MAX_REQUEST_SIZE'd. 154 */ 155 #define XBB_MAX_RING_PAGES \ 156 BLKIF_RING_PAGES(BLKIF_SEGS_TO_BLOCKS(XBB_MAX_SEGMENTS_PER_REQUEST) \ 157 * XBB_MAX_REQUESTS) 158 /** 159 * The maximum number of ring pages that we can allow per request list. 160 * We limit this to the maximum number of segments per request, because 161 * that is already a reasonable number of segments to aggregate. This 162 * number should never be smaller than XBB_MAX_SEGMENTS_PER_REQUEST, 163 * because that would leave situations where we can't dispatch even one 164 * large request. 165 */ 166 #define XBB_MAX_SEGMENTS_PER_REQLIST XBB_MAX_SEGMENTS_PER_REQUEST 167 168 /*--------------------------- Forward Declarations ---------------------------*/ 169 struct xbb_softc; 170 struct xbb_xen_req; 171 172 static void xbb_attach_failed(struct xbb_softc *xbb, int err, const char *fmt, 173 ...) __attribute__((format(printf, 3, 4))); 174 static int xbb_shutdown(struct xbb_softc *xbb); 175 static int xbb_detach(device_t dev); 176 177 /*------------------------------ Data Structures -----------------------------*/ 178 179 STAILQ_HEAD(xbb_xen_req_list, xbb_xen_req); 180 181 typedef enum { 182 XBB_REQLIST_NONE = 0x00, 183 XBB_REQLIST_MAPPED = 0x01 184 } xbb_reqlist_flags; 185 186 struct xbb_xen_reqlist { 187 /** 188 * Back reference to the parent block back instance for this 189 * request. Used during bio_done handling. 190 */ 191 struct xbb_softc *xbb; 192 193 /** 194 * BLKIF_OP code for this request. 195 */ 196 int operation; 197 198 /** 199 * Set to BLKIF_RSP_* to indicate request status. 200 * 201 * This field allows an error status to be recorded even if the 202 * delivery of this status must be deferred. Deferred reporting 203 * is necessary, for example, when an error is detected during 204 * completion processing of one bio when other bios for this 205 * request are still outstanding. 206 */ 207 int status; 208 209 /** 210 * Number of 512 byte sectors not transferred. 211 */ 212 int residual_512b_sectors; 213 214 /** 215 * Starting sector number of the first request in the list. 216 */ 217 off_t starting_sector_number; 218 219 /** 220 * If we're going to coalesce, the next contiguous sector would be 221 * this one. 222 */ 223 off_t next_contig_sector; 224 225 /** 226 * Number of child requests in the list. 227 */ 228 int num_children; 229 230 /** 231 * Number of I/O requests still pending on the backend. 232 */ 233 int pendcnt; 234 235 /** 236 * Total number of segments for requests in the list. 237 */ 238 int nr_segments; 239 240 /** 241 * Flags for this particular request list. 242 */ 243 xbb_reqlist_flags flags; 244 245 /** 246 * Kernel virtual address space reserved for this request 247 * list structure and used to map the remote domain's pages for 248 * this I/O, into our domain's address space. 249 */ 250 uint8_t *kva; 251 252 /** 253 * Base, psuedo-physical address, corresponding to the start 254 * of this request's kva region. 255 */ 256 uint64_t gnt_base; 257 258 259 #ifdef XBB_USE_BOUNCE_BUFFERS 260 /** 261 * Pre-allocated domain local memory used to proxy remote 262 * domain memory during I/O operations. 263 */ 264 uint8_t *bounce; 265 #endif 266 267 /** 268 * Array of grant handles (one per page) used to map this request. 269 */ 270 grant_handle_t *gnt_handles; 271 272 /** 273 * Device statistics request ordering type (ordered or simple). 274 */ 275 devstat_tag_type ds_tag_type; 276 277 /** 278 * Device statistics request type (read, write, no_data). 279 */ 280 devstat_trans_flags ds_trans_type; 281 282 /** 283 * The start time for this request. 284 */ 285 struct bintime ds_t0; 286 287 /** 288 * Linked list of contiguous requests with the same operation type. 289 */ 290 struct xbb_xen_req_list contig_req_list; 291 292 /** 293 * Linked list links used to aggregate idle requests in the 294 * request list free pool (xbb->reqlist_free_stailq) and pending 295 * requests waiting for execution (xbb->reqlist_pending_stailq). 296 */ 297 STAILQ_ENTRY(xbb_xen_reqlist) links; 298 }; 299 300 STAILQ_HEAD(xbb_xen_reqlist_list, xbb_xen_reqlist); 301 302 /** 303 * \brief Object tracking an in-flight I/O from a Xen VBD consumer. 304 */ 305 struct xbb_xen_req { 306 /** 307 * Linked list links used to aggregate requests into a reqlist 308 * and to store them in the request free pool. 309 */ 310 STAILQ_ENTRY(xbb_xen_req) links; 311 312 /** 313 * The remote domain's identifier for this I/O request. 314 */ 315 uint64_t id; 316 317 /** 318 * The number of pages currently mapped for this request. 319 */ 320 int nr_pages; 321 322 /** 323 * The number of 512 byte sectors comprising this requests. 324 */ 325 int nr_512b_sectors; 326 327 /** 328 * BLKIF_OP code for this request. 329 */ 330 int operation; 331 332 /** 333 * Storage used for non-native ring requests. 334 */ 335 blkif_request_t ring_req_storage; 336 337 /** 338 * Pointer to the Xen request in the ring. 339 */ 340 blkif_request_t *ring_req; 341 342 /** 343 * Consumer index for this request. 344 */ 345 RING_IDX req_ring_idx; 346 347 /** 348 * The start time for this request. 349 */ 350 struct bintime ds_t0; 351 352 /** 353 * Pointer back to our parent request list. 354 */ 355 struct xbb_xen_reqlist *reqlist; 356 }; 357 SLIST_HEAD(xbb_xen_req_slist, xbb_xen_req); 358 359 /** 360 * \brief Configuration data for the shared memory request ring 361 * used to communicate with the front-end client of this 362 * this driver. 363 */ 364 struct xbb_ring_config { 365 /** KVA address where ring memory is mapped. */ 366 vm_offset_t va; 367 368 /** The pseudo-physical address where ring memory is mapped.*/ 369 uint64_t gnt_addr; 370 371 /** 372 * Grant table handles, one per-ring page, returned by the 373 * hyperpervisor upon mapping of the ring and required to 374 * unmap it when a connection is torn down. 375 */ 376 grant_handle_t handle[XBB_MAX_RING_PAGES]; 377 378 /** 379 * The device bus address returned by the hypervisor when 380 * mapping the ring and required to unmap it when a connection 381 * is torn down. 382 */ 383 uint64_t bus_addr[XBB_MAX_RING_PAGES]; 384 385 /** The number of ring pages mapped for the current connection. */ 386 u_int ring_pages; 387 388 /** 389 * The grant references, one per-ring page, supplied by the 390 * front-end, allowing us to reference the ring pages in the 391 * front-end's domain and to map these pages into our own domain. 392 */ 393 grant_ref_t ring_ref[XBB_MAX_RING_PAGES]; 394 395 /** The interrupt driven even channel used to signal ring events. */ 396 evtchn_port_t evtchn; 397 }; 398 399 /** 400 * Per-instance connection state flags. 401 */ 402 typedef enum 403 { 404 /** 405 * The front-end requested a read-only mount of the 406 * back-end device/file. 407 */ 408 XBBF_READ_ONLY = 0x01, 409 410 /** Communication with the front-end has been established. */ 411 XBBF_RING_CONNECTED = 0x02, 412 413 /** 414 * Front-end requests exist in the ring and are waiting for 415 * xbb_xen_req objects to free up. 416 */ 417 XBBF_RESOURCE_SHORTAGE = 0x04, 418 419 /** Connection teardown in progress. */ 420 XBBF_SHUTDOWN = 0x08, 421 422 /** A thread is already performing shutdown processing. */ 423 XBBF_IN_SHUTDOWN = 0x10 424 } xbb_flag_t; 425 426 /** Backend device type. */ 427 typedef enum { 428 /** Backend type unknown. */ 429 XBB_TYPE_NONE = 0x00, 430 431 /** 432 * Backend type disk (access via cdev switch 433 * strategy routine). 434 */ 435 XBB_TYPE_DISK = 0x01, 436 437 /** Backend type file (access vnode operations.). */ 438 XBB_TYPE_FILE = 0x02 439 } xbb_type; 440 441 /** 442 * \brief Structure used to memoize information about a per-request 443 * scatter-gather list. 444 * 445 * The chief benefit of using this data structure is it avoids having 446 * to reparse the possibly discontiguous S/G list in the original 447 * request. Due to the way that the mapping of the memory backing an 448 * I/O transaction is handled by Xen, a second pass is unavoidable. 449 * At least this way the second walk is a simple array traversal. 450 * 451 * \note A single Scatter/Gather element in the block interface covers 452 * at most 1 machine page. In this context a sector (blkif 453 * nomenclature, not what I'd choose) is a 512b aligned unit 454 * of mapping within the machine page referenced by an S/G 455 * element. 456 */ 457 struct xbb_sg { 458 /** The number of 512b data chunks mapped in this S/G element. */ 459 int16_t nsect; 460 461 /** 462 * The index (0 based) of the first 512b data chunk mapped 463 * in this S/G element. 464 */ 465 uint8_t first_sect; 466 467 /** 468 * The index (0 based) of the last 512b data chunk mapped 469 * in this S/G element. 470 */ 471 uint8_t last_sect; 472 }; 473 474 /** 475 * Character device backend specific configuration data. 476 */ 477 struct xbb_dev_data { 478 /** Cdev used for device backend access. */ 479 struct cdev *cdev; 480 481 /** Cdev switch used for device backend access. */ 482 struct cdevsw *csw; 483 484 /** Used to hold a reference on opened cdev backend devices. */ 485 int dev_ref; 486 }; 487 488 /** 489 * File backend specific configuration data. 490 */ 491 struct xbb_file_data { 492 /** Credentials to use for vnode backed (file based) I/O. */ 493 struct ucred *cred; 494 495 /** 496 * \brief Array of io vectors used to process file based I/O. 497 * 498 * Only a single file based request is outstanding per-xbb instance, 499 * so we only need one of these. 500 */ 501 struct iovec xiovecs[XBB_MAX_SEGMENTS_PER_REQLIST]; 502 #ifdef XBB_USE_BOUNCE_BUFFERS 503 504 /** 505 * \brief Array of io vectors used to handle bouncing of file reads. 506 * 507 * Vnode operations are free to modify uio data during their 508 * exectuion. In the case of a read with bounce buffering active, 509 * we need some of the data from the original uio in order to 510 * bounce-out the read data. This array serves as the temporary 511 * storage for this saved data. 512 */ 513 struct iovec saved_xiovecs[XBB_MAX_SEGMENTS_PER_REQLIST]; 514 515 /** 516 * \brief Array of memoized bounce buffer kva offsets used 517 * in the file based backend. 518 * 519 * Due to the way that the mapping of the memory backing an 520 * I/O transaction is handled by Xen, a second pass through 521 * the request sg elements is unavoidable. We memoize the computed 522 * bounce address here to reduce the cost of the second walk. 523 */ 524 void *xiovecs_vaddr[XBB_MAX_SEGMENTS_PER_REQLIST]; 525 #endif /* XBB_USE_BOUNCE_BUFFERS */ 526 }; 527 528 /** 529 * Collection of backend type specific data. 530 */ 531 union xbb_backend_data { 532 struct xbb_dev_data dev; 533 struct xbb_file_data file; 534 }; 535 536 /** 537 * Function signature of backend specific I/O handlers. 538 */ 539 typedef int (*xbb_dispatch_t)(struct xbb_softc *xbb, 540 struct xbb_xen_reqlist *reqlist, int operation, 541 int flags); 542 543 /** 544 * Per-instance configuration data. 545 */ 546 struct xbb_softc { 547 548 /** 549 * Task-queue used to process I/O requests. 550 */ 551 struct taskqueue *io_taskqueue; 552 553 /** 554 * Single "run the request queue" task enqueued 555 * on io_taskqueue. 556 */ 557 struct task io_task; 558 559 /** Device type for this instance. */ 560 xbb_type device_type; 561 562 /** NewBus device corresponding to this instance. */ 563 device_t dev; 564 565 /** Backend specific dispatch routine for this instance. */ 566 xbb_dispatch_t dispatch_io; 567 568 /** The number of requests outstanding on the backend device/file. */ 569 int active_request_count; 570 571 /** Free pool of request tracking structures. */ 572 struct xbb_xen_req_list request_free_stailq; 573 574 /** Array, sized at connection time, of request tracking structures. */ 575 struct xbb_xen_req *requests; 576 577 /** Free pool of request list structures. */ 578 struct xbb_xen_reqlist_list reqlist_free_stailq; 579 580 /** List of pending request lists awaiting execution. */ 581 struct xbb_xen_reqlist_list reqlist_pending_stailq; 582 583 /** Array, sized at connection time, of request list structures. */ 584 struct xbb_xen_reqlist *request_lists; 585 586 /** 587 * Global pool of kva used for mapping remote domain ring 588 * and I/O transaction data. 589 */ 590 vm_offset_t kva; 591 592 /** Psuedo-physical address corresponding to kva. */ 593 uint64_t gnt_base_addr; 594 595 /** The size of the global kva pool. */ 596 int kva_size; 597 598 /** The size of the KVA area used for request lists. */ 599 int reqlist_kva_size; 600 601 /** The number of pages of KVA used for request lists */ 602 int reqlist_kva_pages; 603 604 /** Bitmap of free KVA pages */ 605 bitstr_t *kva_free; 606 607 /** 608 * \brief Cached value of the front-end's domain id. 609 * 610 * This value is used at once for each mapped page in 611 * a transaction. We cache it to avoid incuring the 612 * cost of an ivar access every time this is needed. 613 */ 614 domid_t otherend_id; 615 616 /** 617 * \brief The blkif protocol abi in effect. 618 * 619 * There are situations where the back and front ends can 620 * have a different, native abi (e.g. intel x86_64 and 621 * 32bit x86 domains on the same machine). The back-end 622 * always accomodates the front-end's native abi. That 623 * value is pulled from the XenStore and recorded here. 624 */ 625 int abi; 626 627 /** 628 * \brief The maximum number of requests and request lists allowed 629 * to be in flight at a time. 630 * 631 * This value is negotiated via the XenStore. 632 */ 633 u_int max_requests; 634 635 /** 636 * \brief The maximum number of segments (1 page per segment) 637 * that can be mapped by a request. 638 * 639 * This value is negotiated via the XenStore. 640 */ 641 u_int max_request_segments; 642 643 /** 644 * \brief Maximum number of segments per request list. 645 * 646 * This value is derived from and will generally be larger than 647 * max_request_segments. 648 */ 649 u_int max_reqlist_segments; 650 651 /** 652 * The maximum size of any request to this back-end 653 * device. 654 * 655 * This value is negotiated via the XenStore. 656 */ 657 u_int max_request_size; 658 659 /** 660 * The maximum size of any request list. This is derived directly 661 * from max_reqlist_segments. 662 */ 663 u_int max_reqlist_size; 664 665 /** Various configuration and state bit flags. */ 666 xbb_flag_t flags; 667 668 /** Ring mapping and interrupt configuration data. */ 669 struct xbb_ring_config ring_config; 670 671 /** Runtime, cross-abi safe, structures for ring access. */ 672 blkif_back_rings_t rings; 673 674 /** IRQ mapping for the communication ring event channel. */ 675 xen_intr_handle_t xen_intr_handle; 676 677 /** 678 * \brief Backend access mode flags (e.g. write, or read-only). 679 * 680 * This value is passed to us by the front-end via the XenStore. 681 */ 682 char *dev_mode; 683 684 /** 685 * \brief Backend device type (e.g. "disk", "cdrom", "floppy"). 686 * 687 * This value is passed to us by the front-end via the XenStore. 688 * Currently unused. 689 */ 690 char *dev_type; 691 692 /** 693 * \brief Backend device/file identifier. 694 * 695 * This value is passed to us by the front-end via the XenStore. 696 * We expect this to be a POSIX path indicating the file or 697 * device to open. 698 */ 699 char *dev_name; 700 701 /** 702 * Vnode corresponding to the backend device node or file 703 * we are acessing. 704 */ 705 struct vnode *vn; 706 707 union xbb_backend_data backend; 708 709 /** The native sector size of the backend. */ 710 u_int sector_size; 711 712 /** log2 of sector_size. */ 713 u_int sector_size_shift; 714 715 /** Size in bytes of the backend device or file. */ 716 off_t media_size; 717 718 /** 719 * \brief media_size expressed in terms of the backend native 720 * sector size. 721 * 722 * (e.g. xbb->media_size >> xbb->sector_size_shift). 723 */ 724 uint64_t media_num_sectors; 725 726 /** 727 * \brief Array of memoized scatter gather data computed during the 728 * conversion of blkif ring requests to internal xbb_xen_req 729 * structures. 730 * 731 * Ring processing is serialized so we only need one of these. 732 */ 733 struct xbb_sg xbb_sgs[XBB_MAX_SEGMENTS_PER_REQLIST]; 734 735 /** 736 * Temporary grant table map used in xbb_dispatch_io(). When 737 * XBB_MAX_SEGMENTS_PER_REQLIST gets large, keeping this on the 738 * stack could cause a stack overflow. 739 */ 740 struct gnttab_map_grant_ref maps[XBB_MAX_SEGMENTS_PER_REQLIST]; 741 742 /** Mutex protecting per-instance data. */ 743 struct mtx lock; 744 745 #ifdef XENHVM 746 /** 747 * Resource representing allocated physical address space 748 * associated with our per-instance kva region. 749 */ 750 struct resource *pseudo_phys_res; 751 752 /** Resource id for allocated physical address space. */ 753 int pseudo_phys_res_id; 754 #endif 755 756 /** 757 * I/O statistics from BlockBack dispatch down. These are 758 * coalesced requests, and we start them right before execution. 759 */ 760 struct devstat *xbb_stats; 761 762 /** 763 * I/O statistics coming into BlockBack. These are the requests as 764 * we get them from BlockFront. They are started as soon as we 765 * receive a request, and completed when the I/O is complete. 766 */ 767 struct devstat *xbb_stats_in; 768 769 /** Disable sending flush to the backend */ 770 int disable_flush; 771 772 /** Send a real flush for every N flush requests */ 773 int flush_interval; 774 775 /** Count of flush requests in the interval */ 776 int flush_count; 777 778 /** Don't coalesce requests if this is set */ 779 int no_coalesce_reqs; 780 781 /** Number of requests we have received */ 782 uint64_t reqs_received; 783 784 /** Number of requests we have completed*/ 785 uint64_t reqs_completed; 786 787 /** Number of requests we queued but not pushed*/ 788 uint64_t reqs_queued_for_completion; 789 790 /** Number of requests we completed with an error status*/ 791 uint64_t reqs_completed_with_error; 792 793 /** How many forced dispatches (i.e. without coalescing) have happend */ 794 uint64_t forced_dispatch; 795 796 /** How many normal dispatches have happend */ 797 uint64_t normal_dispatch; 798 799 /** How many total dispatches have happend */ 800 uint64_t total_dispatch; 801 802 /** How many times we have run out of KVA */ 803 uint64_t kva_shortages; 804 805 /** How many times we have run out of request structures */ 806 uint64_t request_shortages; 807 }; 808 809 /*---------------------------- Request Processing ----------------------------*/ 810 /** 811 * Allocate an internal transaction tracking structure from the free pool. 812 * 813 * \param xbb Per-instance xbb configuration structure. 814 * 815 * \return On success, a pointer to the allocated xbb_xen_req structure. 816 * Otherwise NULL. 817 */ 818 static inline struct xbb_xen_req * 819 xbb_get_req(struct xbb_softc *xbb) 820 { 821 struct xbb_xen_req *req; 822 823 req = NULL; 824 825 mtx_assert(&xbb->lock, MA_OWNED); 826 827 if ((req = STAILQ_FIRST(&xbb->request_free_stailq)) != NULL) { 828 STAILQ_REMOVE_HEAD(&xbb->request_free_stailq, links); 829 xbb->active_request_count++; 830 } 831 832 return (req); 833 } 834 835 /** 836 * Return an allocated transaction tracking structure to the free pool. 837 * 838 * \param xbb Per-instance xbb configuration structure. 839 * \param req The request structure to free. 840 */ 841 static inline void 842 xbb_release_req(struct xbb_softc *xbb, struct xbb_xen_req *req) 843 { 844 mtx_assert(&xbb->lock, MA_OWNED); 845 846 STAILQ_INSERT_HEAD(&xbb->request_free_stailq, req, links); 847 xbb->active_request_count--; 848 849 KASSERT(xbb->active_request_count >= 0, 850 ("xbb_release_req: negative active count")); 851 } 852 853 /** 854 * Return an xbb_xen_req_list of allocated xbb_xen_reqs to the free pool. 855 * 856 * \param xbb Per-instance xbb configuration structure. 857 * \param req_list The list of requests to free. 858 * \param nreqs The number of items in the list. 859 */ 860 static inline void 861 xbb_release_reqs(struct xbb_softc *xbb, struct xbb_xen_req_list *req_list, 862 int nreqs) 863 { 864 mtx_assert(&xbb->lock, MA_OWNED); 865 866 STAILQ_CONCAT(&xbb->request_free_stailq, req_list); 867 xbb->active_request_count -= nreqs; 868 869 KASSERT(xbb->active_request_count >= 0, 870 ("xbb_release_reqs: negative active count")); 871 } 872 873 /** 874 * Given a page index and 512b sector offset within that page, 875 * calculate an offset into a request's kva region. 876 * 877 * \param reqlist The request structure whose kva region will be accessed. 878 * \param pagenr The page index used to compute the kva offset. 879 * \param sector The 512b sector index used to compute the page relative 880 * kva offset. 881 * 882 * \return The computed global KVA offset. 883 */ 884 static inline uint8_t * 885 xbb_reqlist_vaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector) 886 { 887 return (reqlist->kva + (PAGE_SIZE * pagenr) + (sector << 9)); 888 } 889 890 #ifdef XBB_USE_BOUNCE_BUFFERS 891 /** 892 * Given a page index and 512b sector offset within that page, 893 * calculate an offset into a request's local bounce memory region. 894 * 895 * \param reqlist The request structure whose bounce region will be accessed. 896 * \param pagenr The page index used to compute the bounce offset. 897 * \param sector The 512b sector index used to compute the page relative 898 * bounce offset. 899 * 900 * \return The computed global bounce buffer address. 901 */ 902 static inline uint8_t * 903 xbb_reqlist_bounce_addr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector) 904 { 905 return (reqlist->bounce + (PAGE_SIZE * pagenr) + (sector << 9)); 906 } 907 #endif 908 909 /** 910 * Given a page number and 512b sector offset within that page, 911 * calculate an offset into the request's memory region that the 912 * underlying backend device/file should use for I/O. 913 * 914 * \param reqlist The request structure whose I/O region will be accessed. 915 * \param pagenr The page index used to compute the I/O offset. 916 * \param sector The 512b sector index used to compute the page relative 917 * I/O offset. 918 * 919 * \return The computed global I/O address. 920 * 921 * Depending on configuration, this will either be a local bounce buffer 922 * or a pointer to the memory mapped in from the front-end domain for 923 * this request. 924 */ 925 static inline uint8_t * 926 xbb_reqlist_ioaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector) 927 { 928 #ifdef XBB_USE_BOUNCE_BUFFERS 929 return (xbb_reqlist_bounce_addr(reqlist, pagenr, sector)); 930 #else 931 return (xbb_reqlist_vaddr(reqlist, pagenr, sector)); 932 #endif 933 } 934 935 /** 936 * Given a page index and 512b sector offset within that page, calculate 937 * an offset into the local psuedo-physical address space used to map a 938 * front-end's request data into a request. 939 * 940 * \param reqlist The request list structure whose pseudo-physical region 941 * will be accessed. 942 * \param pagenr The page index used to compute the pseudo-physical offset. 943 * \param sector The 512b sector index used to compute the page relative 944 * pseudo-physical offset. 945 * 946 * \return The computed global pseudo-phsyical address. 947 * 948 * Depending on configuration, this will either be a local bounce buffer 949 * or a pointer to the memory mapped in from the front-end domain for 950 * this request. 951 */ 952 static inline uintptr_t 953 xbb_get_gntaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector) 954 { 955 struct xbb_softc *xbb; 956 957 xbb = reqlist->xbb; 958 959 return ((uintptr_t)(xbb->gnt_base_addr + 960 (uintptr_t)(reqlist->kva - xbb->kva) + 961 (PAGE_SIZE * pagenr) + (sector << 9))); 962 } 963 964 /** 965 * Get Kernel Virtual Address space for mapping requests. 966 * 967 * \param xbb Per-instance xbb configuration structure. 968 * \param nr_pages Number of pages needed. 969 * \param check_only If set, check for free KVA but don't allocate it. 970 * \param have_lock If set, xbb lock is already held. 971 * 972 * \return On success, a pointer to the allocated KVA region. Otherwise NULL. 973 * 974 * Note: This should be unnecessary once we have either chaining or 975 * scatter/gather support for struct bio. At that point we'll be able to 976 * put multiple addresses and lengths in one bio/bio chain and won't need 977 * to map everything into one virtual segment. 978 */ 979 static uint8_t * 980 xbb_get_kva(struct xbb_softc *xbb, int nr_pages) 981 { 982 intptr_t first_clear; 983 intptr_t num_clear; 984 uint8_t *free_kva; 985 int i; 986 987 KASSERT(nr_pages != 0, ("xbb_get_kva of zero length")); 988 989 first_clear = 0; 990 free_kva = NULL; 991 992 mtx_lock(&xbb->lock); 993 994 /* 995 * Look for the first available page. If there are none, we're done. 996 */ 997 bit_ffc(xbb->kva_free, xbb->reqlist_kva_pages, &first_clear); 998 999 if (first_clear == -1) 1000 goto bailout; 1001 1002 /* 1003 * Starting at the first available page, look for consecutive free 1004 * pages that will satisfy the user's request. 1005 */ 1006 for (i = first_clear, num_clear = 0; i < xbb->reqlist_kva_pages; i++) { 1007 /* 1008 * If this is true, the page is used, so we have to reset 1009 * the number of clear pages and the first clear page 1010 * (since it pointed to a region with an insufficient number 1011 * of clear pages). 1012 */ 1013 if (bit_test(xbb->kva_free, i)) { 1014 num_clear = 0; 1015 first_clear = -1; 1016 continue; 1017 } 1018 1019 if (first_clear == -1) 1020 first_clear = i; 1021 1022 /* 1023 * If this is true, we've found a large enough free region 1024 * to satisfy the request. 1025 */ 1026 if (++num_clear == nr_pages) { 1027 1028 bit_nset(xbb->kva_free, first_clear, 1029 first_clear + nr_pages - 1); 1030 1031 free_kva = xbb->kva + 1032 (uint8_t *)(first_clear * PAGE_SIZE); 1033 1034 KASSERT(free_kva >= (uint8_t *)xbb->kva && 1035 free_kva + (nr_pages * PAGE_SIZE) <= 1036 (uint8_t *)xbb->ring_config.va, 1037 ("Free KVA %p len %d out of range, " 1038 "kva = %#jx, ring VA = %#jx\n", free_kva, 1039 nr_pages * PAGE_SIZE, (uintmax_t)xbb->kva, 1040 (uintmax_t)xbb->ring_config.va)); 1041 break; 1042 } 1043 } 1044 1045 bailout: 1046 1047 if (free_kva == NULL) { 1048 xbb->flags |= XBBF_RESOURCE_SHORTAGE; 1049 xbb->kva_shortages++; 1050 } 1051 1052 mtx_unlock(&xbb->lock); 1053 1054 return (free_kva); 1055 } 1056 1057 /** 1058 * Free allocated KVA. 1059 * 1060 * \param xbb Per-instance xbb configuration structure. 1061 * \param kva_ptr Pointer to allocated KVA region. 1062 * \param nr_pages Number of pages in the KVA region. 1063 */ 1064 static void 1065 xbb_free_kva(struct xbb_softc *xbb, uint8_t *kva_ptr, int nr_pages) 1066 { 1067 intptr_t start_page; 1068 1069 mtx_assert(&xbb->lock, MA_OWNED); 1070 1071 start_page = (intptr_t)(kva_ptr - xbb->kva) >> PAGE_SHIFT; 1072 bit_nclear(xbb->kva_free, start_page, start_page + nr_pages - 1); 1073 1074 } 1075 1076 /** 1077 * Unmap the front-end pages associated with this I/O request. 1078 * 1079 * \param req The request structure to unmap. 1080 */ 1081 static void 1082 xbb_unmap_reqlist(struct xbb_xen_reqlist *reqlist) 1083 { 1084 struct gnttab_unmap_grant_ref unmap[XBB_MAX_SEGMENTS_PER_REQLIST]; 1085 u_int i; 1086 u_int invcount; 1087 int error; 1088 1089 invcount = 0; 1090 for (i = 0; i < reqlist->nr_segments; i++) { 1091 1092 if (reqlist->gnt_handles[i] == GRANT_REF_INVALID) 1093 continue; 1094 1095 unmap[invcount].host_addr = xbb_get_gntaddr(reqlist, i, 0); 1096 unmap[invcount].dev_bus_addr = 0; 1097 unmap[invcount].handle = reqlist->gnt_handles[i]; 1098 reqlist->gnt_handles[i] = GRANT_REF_INVALID; 1099 invcount++; 1100 } 1101 1102 error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1103 unmap, invcount); 1104 KASSERT(error == 0, ("Grant table operation failed")); 1105 } 1106 1107 /** 1108 * Allocate an internal transaction tracking structure from the free pool. 1109 * 1110 * \param xbb Per-instance xbb configuration structure. 1111 * 1112 * \return On success, a pointer to the allocated xbb_xen_reqlist structure. 1113 * Otherwise NULL. 1114 */ 1115 static inline struct xbb_xen_reqlist * 1116 xbb_get_reqlist(struct xbb_softc *xbb) 1117 { 1118 struct xbb_xen_reqlist *reqlist; 1119 1120 reqlist = NULL; 1121 1122 mtx_assert(&xbb->lock, MA_OWNED); 1123 1124 if ((reqlist = STAILQ_FIRST(&xbb->reqlist_free_stailq)) != NULL) { 1125 1126 STAILQ_REMOVE_HEAD(&xbb->reqlist_free_stailq, links); 1127 reqlist->flags = XBB_REQLIST_NONE; 1128 reqlist->kva = NULL; 1129 reqlist->status = BLKIF_RSP_OKAY; 1130 reqlist->residual_512b_sectors = 0; 1131 reqlist->num_children = 0; 1132 reqlist->nr_segments = 0; 1133 STAILQ_INIT(&reqlist->contig_req_list); 1134 } 1135 1136 return (reqlist); 1137 } 1138 1139 /** 1140 * Return an allocated transaction tracking structure to the free pool. 1141 * 1142 * \param xbb Per-instance xbb configuration structure. 1143 * \param req The request list structure to free. 1144 * \param wakeup If set, wakeup the work thread if freeing this reqlist 1145 * during a resource shortage condition. 1146 */ 1147 static inline void 1148 xbb_release_reqlist(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist, 1149 int wakeup) 1150 { 1151 1152 mtx_assert(&xbb->lock, MA_OWNED); 1153 1154 if (wakeup) { 1155 wakeup = xbb->flags & XBBF_RESOURCE_SHORTAGE; 1156 xbb->flags &= ~XBBF_RESOURCE_SHORTAGE; 1157 } 1158 1159 if (reqlist->kva != NULL) 1160 xbb_free_kva(xbb, reqlist->kva, reqlist->nr_segments); 1161 1162 xbb_release_reqs(xbb, &reqlist->contig_req_list, reqlist->num_children); 1163 1164 STAILQ_INSERT_TAIL(&xbb->reqlist_free_stailq, reqlist, links); 1165 1166 if ((xbb->flags & XBBF_SHUTDOWN) != 0) { 1167 /* 1168 * Shutdown is in progress. See if we can 1169 * progress further now that one more request 1170 * has completed and been returned to the 1171 * free pool. 1172 */ 1173 xbb_shutdown(xbb); 1174 } 1175 1176 if (wakeup != 0) 1177 taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); 1178 } 1179 1180 /** 1181 * Request resources and do basic request setup. 1182 * 1183 * \param xbb Per-instance xbb configuration structure. 1184 * \param reqlist Pointer to reqlist pointer. 1185 * \param ring_req Pointer to a block ring request. 1186 * \param ring_index The ring index of this request. 1187 * 1188 * \return 0 for success, non-zero for failure. 1189 */ 1190 static int 1191 xbb_get_resources(struct xbb_softc *xbb, struct xbb_xen_reqlist **reqlist, 1192 blkif_request_t *ring_req, RING_IDX ring_idx) 1193 { 1194 struct xbb_xen_reqlist *nreqlist; 1195 struct xbb_xen_req *nreq; 1196 1197 nreqlist = NULL; 1198 nreq = NULL; 1199 1200 mtx_lock(&xbb->lock); 1201 1202 /* 1203 * We don't allow new resources to be allocated if we're in the 1204 * process of shutting down. 1205 */ 1206 if ((xbb->flags & XBBF_SHUTDOWN) != 0) { 1207 mtx_unlock(&xbb->lock); 1208 return (1); 1209 } 1210 1211 /* 1212 * Allocate a reqlist if the caller doesn't have one already. 1213 */ 1214 if (*reqlist == NULL) { 1215 nreqlist = xbb_get_reqlist(xbb); 1216 if (nreqlist == NULL) 1217 goto bailout_error; 1218 } 1219 1220 /* We always allocate a request. */ 1221 nreq = xbb_get_req(xbb); 1222 if (nreq == NULL) 1223 goto bailout_error; 1224 1225 mtx_unlock(&xbb->lock); 1226 1227 if (*reqlist == NULL) { 1228 *reqlist = nreqlist; 1229 nreqlist->operation = ring_req->operation; 1230 nreqlist->starting_sector_number = ring_req->sector_number; 1231 STAILQ_INSERT_TAIL(&xbb->reqlist_pending_stailq, nreqlist, 1232 links); 1233 } 1234 1235 nreq->reqlist = *reqlist; 1236 nreq->req_ring_idx = ring_idx; 1237 nreq->id = ring_req->id; 1238 nreq->operation = ring_req->operation; 1239 1240 if (xbb->abi != BLKIF_PROTOCOL_NATIVE) { 1241 bcopy(ring_req, &nreq->ring_req_storage, sizeof(*ring_req)); 1242 nreq->ring_req = &nreq->ring_req_storage; 1243 } else { 1244 nreq->ring_req = ring_req; 1245 } 1246 1247 binuptime(&nreq->ds_t0); 1248 devstat_start_transaction(xbb->xbb_stats_in, &nreq->ds_t0); 1249 STAILQ_INSERT_TAIL(&(*reqlist)->contig_req_list, nreq, links); 1250 (*reqlist)->num_children++; 1251 (*reqlist)->nr_segments += ring_req->nr_segments; 1252 1253 return (0); 1254 1255 bailout_error: 1256 1257 /* 1258 * We're out of resources, so set the shortage flag. The next time 1259 * a request is released, we'll try waking up the work thread to 1260 * see if we can allocate more resources. 1261 */ 1262 xbb->flags |= XBBF_RESOURCE_SHORTAGE; 1263 xbb->request_shortages++; 1264 1265 if (nreq != NULL) 1266 xbb_release_req(xbb, nreq); 1267 1268 if (nreqlist != NULL) 1269 xbb_release_reqlist(xbb, nreqlist, /*wakeup*/ 0); 1270 1271 mtx_unlock(&xbb->lock); 1272 1273 return (1); 1274 } 1275 1276 /** 1277 * Create and queue a response to a blkif request. 1278 * 1279 * \param xbb Per-instance xbb configuration structure. 1280 * \param req The request structure to which to respond. 1281 * \param status The status code to report. See BLKIF_RSP_* 1282 * in sys/xen/interface/io/blkif.h. 1283 */ 1284 static void 1285 xbb_queue_response(struct xbb_softc *xbb, struct xbb_xen_req *req, int status) 1286 { 1287 blkif_response_t *resp; 1288 1289 /* 1290 * The mutex is required here, and should be held across this call 1291 * until after the subsequent call to xbb_push_responses(). This 1292 * is to guarantee that another context won't queue responses and 1293 * push them while we're active. 1294 * 1295 * That could lead to the other end being notified of responses 1296 * before the resources have been freed on this end. The other end 1297 * would then be able to queue additional I/O, and we may run out 1298 * of resources because we haven't freed them all yet. 1299 */ 1300 mtx_assert(&xbb->lock, MA_OWNED); 1301 1302 /* 1303 * Place on the response ring for the relevant domain. 1304 * For now, only the spacing between entries is different 1305 * in the different ABIs, not the response entry layout. 1306 */ 1307 switch (xbb->abi) { 1308 case BLKIF_PROTOCOL_NATIVE: 1309 resp = RING_GET_RESPONSE(&xbb->rings.native, 1310 xbb->rings.native.rsp_prod_pvt); 1311 break; 1312 case BLKIF_PROTOCOL_X86_32: 1313 resp = (blkif_response_t *) 1314 RING_GET_RESPONSE(&xbb->rings.x86_32, 1315 xbb->rings.x86_32.rsp_prod_pvt); 1316 break; 1317 case BLKIF_PROTOCOL_X86_64: 1318 resp = (blkif_response_t *) 1319 RING_GET_RESPONSE(&xbb->rings.x86_64, 1320 xbb->rings.x86_64.rsp_prod_pvt); 1321 break; 1322 default: 1323 panic("Unexpected blkif protocol ABI."); 1324 } 1325 1326 resp->id = req->id; 1327 resp->operation = req->operation; 1328 resp->status = status; 1329 1330 if (status != BLKIF_RSP_OKAY) 1331 xbb->reqs_completed_with_error++; 1332 1333 xbb->rings.common.rsp_prod_pvt += BLKIF_SEGS_TO_BLOCKS(req->nr_pages); 1334 1335 xbb->reqs_queued_for_completion++; 1336 1337 } 1338 1339 /** 1340 * Send queued responses to blkif requests. 1341 * 1342 * \param xbb Per-instance xbb configuration structure. 1343 * \param run_taskqueue Flag that is set to 1 if the taskqueue 1344 * should be run, 0 if it does not need to be run. 1345 * \param notify Flag that is set to 1 if the other end should be 1346 * notified via irq, 0 if the other end should not be 1347 * notified. 1348 */ 1349 static void 1350 xbb_push_responses(struct xbb_softc *xbb, int *run_taskqueue, int *notify) 1351 { 1352 int more_to_do; 1353 1354 /* 1355 * The mutex is required here. 1356 */ 1357 mtx_assert(&xbb->lock, MA_OWNED); 1358 1359 more_to_do = 0; 1360 1361 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbb->rings.common, *notify); 1362 1363 if (xbb->rings.common.rsp_prod_pvt == xbb->rings.common.req_cons) { 1364 1365 /* 1366 * Tail check for pending requests. Allows frontend to avoid 1367 * notifications if requests are already in flight (lower 1368 * overheads and promotes batching). 1369 */ 1370 RING_FINAL_CHECK_FOR_REQUESTS(&xbb->rings.common, more_to_do); 1371 } else if (RING_HAS_UNCONSUMED_REQUESTS(&xbb->rings.common)) { 1372 1373 more_to_do = 1; 1374 } 1375 1376 xbb->reqs_completed += xbb->reqs_queued_for_completion; 1377 xbb->reqs_queued_for_completion = 0; 1378 1379 *run_taskqueue = more_to_do; 1380 } 1381 1382 /** 1383 * Complete a request list. 1384 * 1385 * \param xbb Per-instance xbb configuration structure. 1386 * \param reqlist Allocated internal request list structure. 1387 */ 1388 static void 1389 xbb_complete_reqlist(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist) 1390 { 1391 struct xbb_xen_req *nreq; 1392 off_t sectors_sent; 1393 int notify, run_taskqueue; 1394 1395 sectors_sent = 0; 1396 1397 if (reqlist->flags & XBB_REQLIST_MAPPED) 1398 xbb_unmap_reqlist(reqlist); 1399 1400 mtx_lock(&xbb->lock); 1401 1402 /* 1403 * All I/O is done, send the response. A lock is not necessary 1404 * to protect the request list, because all requests have 1405 * completed. Therefore this is the only context accessing this 1406 * reqlist right now. However, in order to make sure that no one 1407 * else queues responses onto the queue or pushes them to the other 1408 * side while we're active, we need to hold the lock across the 1409 * calls to xbb_queue_response() and xbb_push_responses(). 1410 */ 1411 STAILQ_FOREACH(nreq, &reqlist->contig_req_list, links) { 1412 off_t cur_sectors_sent; 1413 1414 /* Put this response on the ring, but don't push yet */ 1415 xbb_queue_response(xbb, nreq, reqlist->status); 1416 1417 /* We don't report bytes sent if there is an error. */ 1418 if (reqlist->status == BLKIF_RSP_OKAY) 1419 cur_sectors_sent = nreq->nr_512b_sectors; 1420 else 1421 cur_sectors_sent = 0; 1422 1423 sectors_sent += cur_sectors_sent; 1424 1425 devstat_end_transaction(xbb->xbb_stats_in, 1426 /*bytes*/cur_sectors_sent << 9, 1427 reqlist->ds_tag_type, 1428 reqlist->ds_trans_type, 1429 /*now*/NULL, 1430 /*then*/&nreq->ds_t0); 1431 } 1432 1433 /* 1434 * Take out any sectors not sent. If we wind up negative (which 1435 * might happen if an error is reported as well as a residual), just 1436 * report 0 sectors sent. 1437 */ 1438 sectors_sent -= reqlist->residual_512b_sectors; 1439 if (sectors_sent < 0) 1440 sectors_sent = 0; 1441 1442 devstat_end_transaction(xbb->xbb_stats, 1443 /*bytes*/ sectors_sent << 9, 1444 reqlist->ds_tag_type, 1445 reqlist->ds_trans_type, 1446 /*now*/NULL, 1447 /*then*/&reqlist->ds_t0); 1448 1449 xbb_release_reqlist(xbb, reqlist, /*wakeup*/ 1); 1450 1451 xbb_push_responses(xbb, &run_taskqueue, ¬ify); 1452 1453 mtx_unlock(&xbb->lock); 1454 1455 if (run_taskqueue) 1456 taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); 1457 1458 if (notify) 1459 xen_intr_signal(xbb->xen_intr_handle); 1460 } 1461 1462 /** 1463 * Completion handler for buffer I/O requests issued by the device 1464 * backend driver. 1465 * 1466 * \param bio The buffer I/O request on which to perform completion 1467 * processing. 1468 */ 1469 static void 1470 xbb_bio_done(struct bio *bio) 1471 { 1472 struct xbb_softc *xbb; 1473 struct xbb_xen_reqlist *reqlist; 1474 1475 reqlist = bio->bio_caller1; 1476 xbb = reqlist->xbb; 1477 1478 reqlist->residual_512b_sectors += bio->bio_resid >> 9; 1479 1480 /* 1481 * This is a bit imprecise. With aggregated I/O a single 1482 * request list can contain multiple front-end requests and 1483 * a multiple bios may point to a single request. By carefully 1484 * walking the request list, we could map residuals and errors 1485 * back to the original front-end request, but the interface 1486 * isn't sufficiently rich for us to properly report the error. 1487 * So, we just treat the entire request list as having failed if an 1488 * error occurs on any part. And, if an error occurs, we treat 1489 * the amount of data transferred as 0. 1490 * 1491 * For residuals, we report it on the overall aggregated device, 1492 * but not on the individual requests, since we don't currently 1493 * do the work to determine which front-end request to which the 1494 * residual applies. 1495 */ 1496 if (bio->bio_error) { 1497 DPRINTF("BIO returned error %d for operation on device %s\n", 1498 bio->bio_error, xbb->dev_name); 1499 reqlist->status = BLKIF_RSP_ERROR; 1500 1501 if (bio->bio_error == ENXIO 1502 && xenbus_get_state(xbb->dev) == XenbusStateConnected) { 1503 1504 /* 1505 * Backend device has disappeared. Signal the 1506 * front-end that we (the device proxy) want to 1507 * go away. 1508 */ 1509 xenbus_set_state(xbb->dev, XenbusStateClosing); 1510 } 1511 } 1512 1513 #ifdef XBB_USE_BOUNCE_BUFFERS 1514 if (bio->bio_cmd == BIO_READ) { 1515 vm_offset_t kva_offset; 1516 1517 kva_offset = (vm_offset_t)bio->bio_data 1518 - (vm_offset_t)reqlist->bounce; 1519 memcpy((uint8_t *)reqlist->kva + kva_offset, 1520 bio->bio_data, bio->bio_bcount); 1521 } 1522 #endif /* XBB_USE_BOUNCE_BUFFERS */ 1523 1524 /* 1525 * Decrement the pending count for the request list. When we're 1526 * done with the requests, send status back for all of them. 1527 */ 1528 if (atomic_fetchadd_int(&reqlist->pendcnt, -1) == 1) 1529 xbb_complete_reqlist(xbb, reqlist); 1530 1531 g_destroy_bio(bio); 1532 } 1533 1534 /** 1535 * Parse a blkif request into an internal request structure and send 1536 * it to the backend for processing. 1537 * 1538 * \param xbb Per-instance xbb configuration structure. 1539 * \param reqlist Allocated internal request list structure. 1540 * 1541 * \return On success, 0. For resource shortages, non-zero. 1542 * 1543 * This routine performs the backend common aspects of request parsing 1544 * including compiling an internal request structure, parsing the S/G 1545 * list and any secondary ring requests in which they may reside, and 1546 * the mapping of front-end I/O pages into our domain. 1547 */ 1548 static int 1549 xbb_dispatch_io(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist) 1550 { 1551 struct xbb_sg *xbb_sg; 1552 struct gnttab_map_grant_ref *map; 1553 struct blkif_request_segment *sg; 1554 struct blkif_request_segment *last_block_sg; 1555 struct xbb_xen_req *nreq; 1556 u_int nseg; 1557 u_int seg_idx; 1558 u_int block_segs; 1559 int nr_sects; 1560 int total_sects; 1561 int operation; 1562 uint8_t bio_flags; 1563 int error; 1564 1565 reqlist->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1566 bio_flags = 0; 1567 total_sects = 0; 1568 nr_sects = 0; 1569 1570 /* 1571 * First determine whether we have enough free KVA to satisfy this 1572 * request list. If not, tell xbb_run_queue() so it can go to 1573 * sleep until we have more KVA. 1574 */ 1575 reqlist->kva = NULL; 1576 if (reqlist->nr_segments != 0) { 1577 reqlist->kva = xbb_get_kva(xbb, reqlist->nr_segments); 1578 if (reqlist->kva == NULL) { 1579 /* 1580 * If we're out of KVA, return ENOMEM. 1581 */ 1582 return (ENOMEM); 1583 } 1584 } 1585 1586 binuptime(&reqlist->ds_t0); 1587 devstat_start_transaction(xbb->xbb_stats, &reqlist->ds_t0); 1588 1589 switch (reqlist->operation) { 1590 case BLKIF_OP_WRITE_BARRIER: 1591 bio_flags |= BIO_ORDERED; 1592 reqlist->ds_tag_type = DEVSTAT_TAG_ORDERED; 1593 /* FALLTHROUGH */ 1594 case BLKIF_OP_WRITE: 1595 operation = BIO_WRITE; 1596 reqlist->ds_trans_type = DEVSTAT_WRITE; 1597 if ((xbb->flags & XBBF_READ_ONLY) != 0) { 1598 DPRINTF("Attempt to write to read only device %s\n", 1599 xbb->dev_name); 1600 reqlist->status = BLKIF_RSP_ERROR; 1601 goto send_response; 1602 } 1603 break; 1604 case BLKIF_OP_READ: 1605 operation = BIO_READ; 1606 reqlist->ds_trans_type = DEVSTAT_READ; 1607 break; 1608 case BLKIF_OP_FLUSH_DISKCACHE: 1609 /* 1610 * If this is true, the user has requested that we disable 1611 * flush support. So we just complete the requests 1612 * successfully. 1613 */ 1614 if (xbb->disable_flush != 0) { 1615 goto send_response; 1616 } 1617 1618 /* 1619 * The user has requested that we only send a real flush 1620 * for every N flush requests. So keep count, and either 1621 * complete the request immediately or queue it for the 1622 * backend. 1623 */ 1624 if (xbb->flush_interval != 0) { 1625 if (++(xbb->flush_count) < xbb->flush_interval) { 1626 goto send_response; 1627 } else 1628 xbb->flush_count = 0; 1629 } 1630 1631 operation = BIO_FLUSH; 1632 reqlist->ds_tag_type = DEVSTAT_TAG_ORDERED; 1633 reqlist->ds_trans_type = DEVSTAT_NO_DATA; 1634 goto do_dispatch; 1635 /*NOTREACHED*/ 1636 default: 1637 DPRINTF("error: unknown block io operation [%d]\n", 1638 reqlist->operation); 1639 reqlist->status = BLKIF_RSP_ERROR; 1640 goto send_response; 1641 } 1642 1643 reqlist->xbb = xbb; 1644 xbb_sg = xbb->xbb_sgs; 1645 map = xbb->maps; 1646 seg_idx = 0; 1647 1648 STAILQ_FOREACH(nreq, &reqlist->contig_req_list, links) { 1649 blkif_request_t *ring_req; 1650 RING_IDX req_ring_idx; 1651 u_int req_seg_idx; 1652 1653 ring_req = nreq->ring_req; 1654 req_ring_idx = nreq->req_ring_idx; 1655 nr_sects = 0; 1656 nseg = ring_req->nr_segments; 1657 nreq->nr_pages = nseg; 1658 nreq->nr_512b_sectors = 0; 1659 req_seg_idx = 0; 1660 sg = NULL; 1661 1662 /* Check that number of segments is sane. */ 1663 if (__predict_false(nseg == 0) 1664 || __predict_false(nseg > xbb->max_request_segments)) { 1665 DPRINTF("Bad number of segments in request (%d)\n", 1666 nseg); 1667 reqlist->status = BLKIF_RSP_ERROR; 1668 goto send_response; 1669 } 1670 1671 block_segs = MIN(nreq->nr_pages, 1672 BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK); 1673 sg = ring_req->seg; 1674 last_block_sg = sg + block_segs; 1675 while (1) { 1676 1677 while (sg < last_block_sg) { 1678 KASSERT(seg_idx < 1679 XBB_MAX_SEGMENTS_PER_REQLIST, 1680 ("seg_idx %d is too large, max " 1681 "segs %d\n", seg_idx, 1682 XBB_MAX_SEGMENTS_PER_REQLIST)); 1683 1684 xbb_sg->first_sect = sg->first_sect; 1685 xbb_sg->last_sect = sg->last_sect; 1686 xbb_sg->nsect = 1687 (int8_t)(sg->last_sect - 1688 sg->first_sect + 1); 1689 1690 if ((sg->last_sect >= (PAGE_SIZE >> 9)) 1691 || (xbb_sg->nsect <= 0)) { 1692 reqlist->status = BLKIF_RSP_ERROR; 1693 goto send_response; 1694 } 1695 1696 nr_sects += xbb_sg->nsect; 1697 map->host_addr = xbb_get_gntaddr(reqlist, 1698 seg_idx, /*sector*/0); 1699 KASSERT(map->host_addr + PAGE_SIZE <= 1700 xbb->ring_config.gnt_addr, 1701 ("Host address %#jx len %d overlaps " 1702 "ring address %#jx\n", 1703 (uintmax_t)map->host_addr, PAGE_SIZE, 1704 (uintmax_t)xbb->ring_config.gnt_addr)); 1705 1706 map->flags = GNTMAP_host_map; 1707 map->ref = sg->gref; 1708 map->dom = xbb->otherend_id; 1709 if (operation == BIO_WRITE) 1710 map->flags |= GNTMAP_readonly; 1711 sg++; 1712 map++; 1713 xbb_sg++; 1714 seg_idx++; 1715 req_seg_idx++; 1716 } 1717 1718 block_segs = MIN(nseg - req_seg_idx, 1719 BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK); 1720 if (block_segs == 0) 1721 break; 1722 1723 /* 1724 * Fetch the next request block full of SG elements. 1725 * For now, only the spacing between entries is 1726 * different in the different ABIs, not the sg entry 1727 * layout. 1728 */ 1729 req_ring_idx++; 1730 switch (xbb->abi) { 1731 case BLKIF_PROTOCOL_NATIVE: 1732 sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.native, 1733 req_ring_idx); 1734 break; 1735 case BLKIF_PROTOCOL_X86_32: 1736 { 1737 sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.x86_32, 1738 req_ring_idx); 1739 break; 1740 } 1741 case BLKIF_PROTOCOL_X86_64: 1742 { 1743 sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.x86_64, 1744 req_ring_idx); 1745 break; 1746 } 1747 default: 1748 panic("Unexpected blkif protocol ABI."); 1749 /* NOTREACHED */ 1750 } 1751 last_block_sg = sg + block_segs; 1752 } 1753 1754 /* Convert to the disk's sector size */ 1755 nreq->nr_512b_sectors = nr_sects; 1756 nr_sects = (nr_sects << 9) >> xbb->sector_size_shift; 1757 total_sects += nr_sects; 1758 1759 if ((nreq->nr_512b_sectors & 1760 ((xbb->sector_size >> 9) - 1)) != 0) { 1761 device_printf(xbb->dev, "%s: I/O size (%d) is not " 1762 "a multiple of the backing store sector " 1763 "size (%d)\n", __func__, 1764 nreq->nr_512b_sectors << 9, 1765 xbb->sector_size); 1766 reqlist->status = BLKIF_RSP_ERROR; 1767 goto send_response; 1768 } 1769 } 1770 1771 error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1772 xbb->maps, reqlist->nr_segments); 1773 if (error != 0) 1774 panic("Grant table operation failed (%d)", error); 1775 1776 reqlist->flags |= XBB_REQLIST_MAPPED; 1777 1778 for (seg_idx = 0, map = xbb->maps; seg_idx < reqlist->nr_segments; 1779 seg_idx++, map++){ 1780 1781 if (__predict_false(map->status != 0)) { 1782 DPRINTF("invalid buffer -- could not remap " 1783 "it (%d)\n", map->status); 1784 DPRINTF("Mapping(%d): Host Addr 0x%lx, flags " 1785 "0x%x ref 0x%x, dom %d\n", seg_idx, 1786 map->host_addr, map->flags, map->ref, 1787 map->dom); 1788 reqlist->status = BLKIF_RSP_ERROR; 1789 goto send_response; 1790 } 1791 1792 reqlist->gnt_handles[seg_idx] = map->handle; 1793 } 1794 if (reqlist->starting_sector_number + total_sects > 1795 xbb->media_num_sectors) { 1796 1797 DPRINTF("%s of [%" PRIu64 ",%" PRIu64 "] " 1798 "extends past end of device %s\n", 1799 operation == BIO_READ ? "read" : "write", 1800 reqlist->starting_sector_number, 1801 reqlist->starting_sector_number + total_sects, 1802 xbb->dev_name); 1803 reqlist->status = BLKIF_RSP_ERROR; 1804 goto send_response; 1805 } 1806 1807 do_dispatch: 1808 1809 error = xbb->dispatch_io(xbb, 1810 reqlist, 1811 operation, 1812 bio_flags); 1813 1814 if (error != 0) { 1815 reqlist->status = BLKIF_RSP_ERROR; 1816 goto send_response; 1817 } 1818 1819 return (0); 1820 1821 send_response: 1822 1823 xbb_complete_reqlist(xbb, reqlist); 1824 1825 return (0); 1826 } 1827 1828 static __inline int 1829 xbb_count_sects(blkif_request_t *ring_req) 1830 { 1831 int i; 1832 int cur_size = 0; 1833 1834 for (i = 0; i < ring_req->nr_segments; i++) { 1835 int nsect; 1836 1837 nsect = (int8_t)(ring_req->seg[i].last_sect - 1838 ring_req->seg[i].first_sect + 1); 1839 if (nsect <= 0) 1840 break; 1841 1842 cur_size += nsect; 1843 } 1844 1845 return (cur_size); 1846 } 1847 1848 /** 1849 * Process incoming requests from the shared communication ring in response 1850 * to a signal on the ring's event channel. 1851 * 1852 * \param context Callback argument registerd during task initialization - 1853 * the xbb_softc for this instance. 1854 * \param pending The number of taskqueue_enqueue events that have 1855 * occurred since this handler was last run. 1856 */ 1857 static void 1858 xbb_run_queue(void *context, int pending) 1859 { 1860 struct xbb_softc *xbb; 1861 blkif_back_rings_t *rings; 1862 RING_IDX rp; 1863 uint64_t cur_sector; 1864 int cur_operation; 1865 struct xbb_xen_reqlist *reqlist; 1866 1867 1868 xbb = (struct xbb_softc *)context; 1869 rings = &xbb->rings; 1870 1871 /* 1872 * Work gather and dispatch loop. Note that we have a bias here 1873 * towards gathering I/O sent by blockfront. We first gather up 1874 * everything in the ring, as long as we have resources. Then we 1875 * dispatch one request, and then attempt to gather up any 1876 * additional requests that have come in while we were dispatching 1877 * the request. 1878 * 1879 * This allows us to get a clearer picture (via devstat) of how 1880 * many requests blockfront is queueing to us at any given time. 1881 */ 1882 for (;;) { 1883 int retval; 1884 1885 /* 1886 * Initialize reqlist to the last element in the pending 1887 * queue, if there is one. This allows us to add more 1888 * requests to that request list, if we have room. 1889 */ 1890 reqlist = STAILQ_LAST(&xbb->reqlist_pending_stailq, 1891 xbb_xen_reqlist, links); 1892 if (reqlist != NULL) { 1893 cur_sector = reqlist->next_contig_sector; 1894 cur_operation = reqlist->operation; 1895 } else { 1896 cur_operation = 0; 1897 cur_sector = 0; 1898 } 1899 1900 /* 1901 * Cache req_prod to avoid accessing a cache line shared 1902 * with the frontend. 1903 */ 1904 rp = rings->common.sring->req_prod; 1905 1906 /* Ensure we see queued requests up to 'rp'. */ 1907 rmb(); 1908 1909 /** 1910 * Run so long as there is work to consume and the generation 1911 * of a response will not overflow the ring. 1912 * 1913 * @note There's a 1 to 1 relationship between requests and 1914 * responses, so an overflow should never occur. This 1915 * test is to protect our domain from digesting bogus 1916 * data. Shouldn't we log this? 1917 */ 1918 while (rings->common.req_cons != rp 1919 && RING_REQUEST_CONS_OVERFLOW(&rings->common, 1920 rings->common.req_cons) == 0){ 1921 blkif_request_t ring_req_storage; 1922 blkif_request_t *ring_req; 1923 int cur_size; 1924 1925 switch (xbb->abi) { 1926 case BLKIF_PROTOCOL_NATIVE: 1927 ring_req = RING_GET_REQUEST(&xbb->rings.native, 1928 rings->common.req_cons); 1929 break; 1930 case BLKIF_PROTOCOL_X86_32: 1931 { 1932 struct blkif_x86_32_request *ring_req32; 1933 1934 ring_req32 = RING_GET_REQUEST( 1935 &xbb->rings.x86_32, rings->common.req_cons); 1936 blkif_get_x86_32_req(&ring_req_storage, 1937 ring_req32); 1938 ring_req = &ring_req_storage; 1939 break; 1940 } 1941 case BLKIF_PROTOCOL_X86_64: 1942 { 1943 struct blkif_x86_64_request *ring_req64; 1944 1945 ring_req64 =RING_GET_REQUEST(&xbb->rings.x86_64, 1946 rings->common.req_cons); 1947 blkif_get_x86_64_req(&ring_req_storage, 1948 ring_req64); 1949 ring_req = &ring_req_storage; 1950 break; 1951 } 1952 default: 1953 panic("Unexpected blkif protocol ABI."); 1954 /* NOTREACHED */ 1955 } 1956 1957 /* 1958 * Check for situations that would require closing 1959 * off this I/O for further coalescing: 1960 * - Coalescing is turned off. 1961 * - Current I/O is out of sequence with the previous 1962 * I/O. 1963 * - Coalesced I/O would be too large. 1964 */ 1965 if ((reqlist != NULL) 1966 && ((xbb->no_coalesce_reqs != 0) 1967 || ((xbb->no_coalesce_reqs == 0) 1968 && ((ring_req->sector_number != cur_sector) 1969 || (ring_req->operation != cur_operation) 1970 || ((ring_req->nr_segments + reqlist->nr_segments) > 1971 xbb->max_reqlist_segments))))) { 1972 reqlist = NULL; 1973 } 1974 1975 /* 1976 * Grab and check for all resources in one shot. 1977 * If we can't get all of the resources we need, 1978 * the shortage is noted and the thread will get 1979 * woken up when more resources are available. 1980 */ 1981 retval = xbb_get_resources(xbb, &reqlist, ring_req, 1982 xbb->rings.common.req_cons); 1983 1984 if (retval != 0) { 1985 /* 1986 * Resource shortage has been recorded. 1987 * We'll be scheduled to run once a request 1988 * object frees up due to a completion. 1989 */ 1990 break; 1991 } 1992 1993 /* 1994 * Signify that we can overwrite this request with 1995 * a response by incrementing our consumer index. 1996 * The response won't be generated until after 1997 * we've already consumed all necessary data out 1998 * of the version of the request in the ring buffer 1999 * (for native mode). We must update the consumer 2000 * index before issueing back-end I/O so there is 2001 * no possibility that it will complete and a 2002 * response be generated before we make room in 2003 * the queue for that response. 2004 */ 2005 xbb->rings.common.req_cons += 2006 BLKIF_SEGS_TO_BLOCKS(ring_req->nr_segments); 2007 xbb->reqs_received++; 2008 2009 cur_size = xbb_count_sects(ring_req); 2010 cur_sector = ring_req->sector_number + cur_size; 2011 reqlist->next_contig_sector = cur_sector; 2012 cur_operation = ring_req->operation; 2013 } 2014 2015 /* Check for I/O to dispatch */ 2016 reqlist = STAILQ_FIRST(&xbb->reqlist_pending_stailq); 2017 if (reqlist == NULL) { 2018 /* 2019 * We're out of work to do, put the task queue to 2020 * sleep. 2021 */ 2022 break; 2023 } 2024 2025 /* 2026 * Grab the first request off the queue and attempt 2027 * to dispatch it. 2028 */ 2029 STAILQ_REMOVE_HEAD(&xbb->reqlist_pending_stailq, links); 2030 2031 retval = xbb_dispatch_io(xbb, reqlist); 2032 if (retval != 0) { 2033 /* 2034 * xbb_dispatch_io() returns non-zero only when 2035 * there is a resource shortage. If that's the 2036 * case, re-queue this request on the head of the 2037 * queue, and go to sleep until we have more 2038 * resources. 2039 */ 2040 STAILQ_INSERT_HEAD(&xbb->reqlist_pending_stailq, 2041 reqlist, links); 2042 break; 2043 } else { 2044 /* 2045 * If we still have anything on the queue after 2046 * removing the head entry, that is because we 2047 * met one of the criteria to create a new 2048 * request list (outlined above), and we'll call 2049 * that a forced dispatch for statistical purposes. 2050 * 2051 * Otherwise, if there is only one element on the 2052 * queue, we coalesced everything available on 2053 * the ring and we'll call that a normal dispatch. 2054 */ 2055 reqlist = STAILQ_FIRST(&xbb->reqlist_pending_stailq); 2056 2057 if (reqlist != NULL) 2058 xbb->forced_dispatch++; 2059 else 2060 xbb->normal_dispatch++; 2061 2062 xbb->total_dispatch++; 2063 } 2064 } 2065 } 2066 2067 /** 2068 * Interrupt handler bound to the shared ring's event channel. 2069 * 2070 * \param arg Callback argument registerd during event channel 2071 * binding - the xbb_softc for this instance. 2072 */ 2073 static int 2074 xbb_filter(void *arg) 2075 { 2076 struct xbb_softc *xbb; 2077 2078 /* Defer to taskqueue thread. */ 2079 xbb = (struct xbb_softc *)arg; 2080 taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); 2081 2082 return (FILTER_HANDLED); 2083 } 2084 2085 SDT_PROVIDER_DEFINE(xbb); 2086 SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_dev, flush, "int"); 2087 SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, read, "int", "uint64_t", 2088 "uint64_t"); 2089 SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, write, "int", 2090 "uint64_t", "uint64_t"); 2091 2092 /*----------------------------- Backend Handlers -----------------------------*/ 2093 /** 2094 * Backend handler for character device access. 2095 * 2096 * \param xbb Per-instance xbb configuration structure. 2097 * \param reqlist Allocated internal request list structure. 2098 * \param operation BIO_* I/O operation code. 2099 * \param bio_flags Additional bio_flag data to pass to any generated 2100 * bios (e.g. BIO_ORDERED).. 2101 * 2102 * \return 0 for success, errno codes for failure. 2103 */ 2104 static int 2105 xbb_dispatch_dev(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist, 2106 int operation, int bio_flags) 2107 { 2108 struct xbb_dev_data *dev_data; 2109 struct bio *bios[XBB_MAX_SEGMENTS_PER_REQLIST]; 2110 off_t bio_offset; 2111 struct bio *bio; 2112 struct xbb_sg *xbb_sg; 2113 u_int nbio; 2114 u_int bio_idx; 2115 u_int nseg; 2116 u_int seg_idx; 2117 int error; 2118 2119 dev_data = &xbb->backend.dev; 2120 bio_offset = (off_t)reqlist->starting_sector_number 2121 << xbb->sector_size_shift; 2122 error = 0; 2123 nbio = 0; 2124 bio_idx = 0; 2125 2126 if (operation == BIO_FLUSH) { 2127 bio = g_new_bio(); 2128 if (__predict_false(bio == NULL)) { 2129 DPRINTF("Unable to allocate bio for BIO_FLUSH\n"); 2130 error = ENOMEM; 2131 return (error); 2132 } 2133 2134 bio->bio_cmd = BIO_FLUSH; 2135 bio->bio_flags |= BIO_ORDERED; 2136 bio->bio_dev = dev_data->cdev; 2137 bio->bio_offset = 0; 2138 bio->bio_data = 0; 2139 bio->bio_done = xbb_bio_done; 2140 bio->bio_caller1 = reqlist; 2141 bio->bio_pblkno = 0; 2142 2143 reqlist->pendcnt = 1; 2144 2145 SDT_PROBE1(xbb, kernel, xbb_dispatch_dev, flush, 2146 device_get_unit(xbb->dev)); 2147 2148 (*dev_data->csw->d_strategy)(bio); 2149 2150 return (0); 2151 } 2152 2153 xbb_sg = xbb->xbb_sgs; 2154 bio = NULL; 2155 nseg = reqlist->nr_segments; 2156 2157 for (seg_idx = 0; seg_idx < nseg; seg_idx++, xbb_sg++) { 2158 2159 /* 2160 * KVA will not be contiguous, so any additional 2161 * I/O will need to be represented in a new bio. 2162 */ 2163 if ((bio != NULL) 2164 && (xbb_sg->first_sect != 0)) { 2165 if ((bio->bio_length & (xbb->sector_size - 1)) != 0) { 2166 printf("%s: Discontiguous I/O request " 2167 "from domain %d ends on " 2168 "non-sector boundary\n", 2169 __func__, xbb->otherend_id); 2170 error = EINVAL; 2171 goto fail_free_bios; 2172 } 2173 bio = NULL; 2174 } 2175 2176 if (bio == NULL) { 2177 /* 2178 * Make sure that the start of this bio is 2179 * aligned to a device sector. 2180 */ 2181 if ((bio_offset & (xbb->sector_size - 1)) != 0){ 2182 printf("%s: Misaligned I/O request " 2183 "from domain %d\n", __func__, 2184 xbb->otherend_id); 2185 error = EINVAL; 2186 goto fail_free_bios; 2187 } 2188 2189 bio = bios[nbio++] = g_new_bio(); 2190 if (__predict_false(bio == NULL)) { 2191 error = ENOMEM; 2192 goto fail_free_bios; 2193 } 2194 bio->bio_cmd = operation; 2195 bio->bio_flags |= bio_flags; 2196 bio->bio_dev = dev_data->cdev; 2197 bio->bio_offset = bio_offset; 2198 bio->bio_data = xbb_reqlist_ioaddr(reqlist, seg_idx, 2199 xbb_sg->first_sect); 2200 bio->bio_done = xbb_bio_done; 2201 bio->bio_caller1 = reqlist; 2202 bio->bio_pblkno = bio_offset >> xbb->sector_size_shift; 2203 } 2204 2205 bio->bio_length += xbb_sg->nsect << 9; 2206 bio->bio_bcount = bio->bio_length; 2207 bio_offset += xbb_sg->nsect << 9; 2208 2209 if (xbb_sg->last_sect != (PAGE_SIZE - 512) >> 9) { 2210 2211 if ((bio->bio_length & (xbb->sector_size - 1)) != 0) { 2212 printf("%s: Discontiguous I/O request " 2213 "from domain %d ends on " 2214 "non-sector boundary\n", 2215 __func__, xbb->otherend_id); 2216 error = EINVAL; 2217 goto fail_free_bios; 2218 } 2219 /* 2220 * KVA will not be contiguous, so any additional 2221 * I/O will need to be represented in a new bio. 2222 */ 2223 bio = NULL; 2224 } 2225 } 2226 2227 reqlist->pendcnt = nbio; 2228 2229 for (bio_idx = 0; bio_idx < nbio; bio_idx++) 2230 { 2231 #ifdef XBB_USE_BOUNCE_BUFFERS 2232 vm_offset_t kva_offset; 2233 2234 kva_offset = (vm_offset_t)bios[bio_idx]->bio_data 2235 - (vm_offset_t)reqlist->bounce; 2236 if (operation == BIO_WRITE) { 2237 memcpy(bios[bio_idx]->bio_data, 2238 (uint8_t *)reqlist->kva + kva_offset, 2239 bios[bio_idx]->bio_bcount); 2240 } 2241 #endif 2242 if (operation == BIO_READ) { 2243 SDT_PROBE3(xbb, kernel, xbb_dispatch_dev, read, 2244 device_get_unit(xbb->dev), 2245 bios[bio_idx]->bio_offset, 2246 bios[bio_idx]->bio_length); 2247 } else if (operation == BIO_WRITE) { 2248 SDT_PROBE3(xbb, kernel, xbb_dispatch_dev, write, 2249 device_get_unit(xbb->dev), 2250 bios[bio_idx]->bio_offset, 2251 bios[bio_idx]->bio_length); 2252 } 2253 (*dev_data->csw->d_strategy)(bios[bio_idx]); 2254 } 2255 2256 return (error); 2257 2258 fail_free_bios: 2259 for (bio_idx = 0; bio_idx < (nbio-1); bio_idx++) 2260 g_destroy_bio(bios[bio_idx]); 2261 2262 return (error); 2263 } 2264 2265 SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_file, flush, "int"); 2266 SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, read, "int", "uint64_t", 2267 "uint64_t"); 2268 SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, write, "int", 2269 "uint64_t", "uint64_t"); 2270 2271 /** 2272 * Backend handler for file access. 2273 * 2274 * \param xbb Per-instance xbb configuration structure. 2275 * \param reqlist Allocated internal request list. 2276 * \param operation BIO_* I/O operation code. 2277 * \param flags Additional bio_flag data to pass to any generated bios 2278 * (e.g. BIO_ORDERED).. 2279 * 2280 * \return 0 for success, errno codes for failure. 2281 */ 2282 static int 2283 xbb_dispatch_file(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist, 2284 int operation, int flags) 2285 { 2286 struct xbb_file_data *file_data; 2287 u_int seg_idx; 2288 u_int nseg; 2289 off_t sectors_sent; 2290 struct uio xuio; 2291 struct xbb_sg *xbb_sg; 2292 struct iovec *xiovec; 2293 #ifdef XBB_USE_BOUNCE_BUFFERS 2294 void **p_vaddr; 2295 int saved_uio_iovcnt; 2296 #endif /* XBB_USE_BOUNCE_BUFFERS */ 2297 int error; 2298 2299 file_data = &xbb->backend.file; 2300 sectors_sent = 0; 2301 error = 0; 2302 bzero(&xuio, sizeof(xuio)); 2303 2304 switch (operation) { 2305 case BIO_READ: 2306 xuio.uio_rw = UIO_READ; 2307 break; 2308 case BIO_WRITE: 2309 xuio.uio_rw = UIO_WRITE; 2310 break; 2311 case BIO_FLUSH: { 2312 struct mount *mountpoint; 2313 2314 SDT_PROBE1(xbb, kernel, xbb_dispatch_file, flush, 2315 device_get_unit(xbb->dev)); 2316 2317 (void) vn_start_write(xbb->vn, &mountpoint, V_WAIT); 2318 2319 vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY); 2320 error = VOP_FSYNC(xbb->vn, MNT_WAIT, curthread); 2321 VOP_UNLOCK(xbb->vn, 0); 2322 2323 vn_finished_write(mountpoint); 2324 2325 goto bailout_send_response; 2326 /* NOTREACHED */ 2327 } 2328 default: 2329 panic("invalid operation %d", operation); 2330 /* NOTREACHED */ 2331 } 2332 xuio.uio_offset = (vm_offset_t)reqlist->starting_sector_number 2333 << xbb->sector_size_shift; 2334 xuio.uio_segflg = UIO_SYSSPACE; 2335 xuio.uio_iov = file_data->xiovecs; 2336 xuio.uio_iovcnt = 0; 2337 xbb_sg = xbb->xbb_sgs; 2338 nseg = reqlist->nr_segments; 2339 2340 for (xiovec = NULL, seg_idx = 0; seg_idx < nseg; seg_idx++, xbb_sg++) { 2341 2342 /* 2343 * If the first sector is not 0, the KVA will 2344 * not be contiguous and we'll need to go on 2345 * to another segment. 2346 */ 2347 if (xbb_sg->first_sect != 0) 2348 xiovec = NULL; 2349 2350 if (xiovec == NULL) { 2351 xiovec = &file_data->xiovecs[xuio.uio_iovcnt]; 2352 xiovec->iov_base = xbb_reqlist_ioaddr(reqlist, 2353 seg_idx, xbb_sg->first_sect); 2354 #ifdef XBB_USE_BOUNCE_BUFFERS 2355 /* 2356 * Store the address of the incoming 2357 * buffer at this particular offset 2358 * as well, so we can do the copy 2359 * later without having to do more 2360 * work to recalculate this address. 2361 */ 2362 p_vaddr = &file_data->xiovecs_vaddr[xuio.uio_iovcnt]; 2363 *p_vaddr = xbb_reqlist_vaddr(reqlist, seg_idx, 2364 xbb_sg->first_sect); 2365 #endif /* XBB_USE_BOUNCE_BUFFERS */ 2366 xiovec->iov_len = 0; 2367 xuio.uio_iovcnt++; 2368 } 2369 2370 xiovec->iov_len += xbb_sg->nsect << 9; 2371 2372 xuio.uio_resid += xbb_sg->nsect << 9; 2373 2374 /* 2375 * If the last sector is not the full page 2376 * size count, the next segment will not be 2377 * contiguous in KVA and we need a new iovec. 2378 */ 2379 if (xbb_sg->last_sect != (PAGE_SIZE - 512) >> 9) 2380 xiovec = NULL; 2381 } 2382 2383 xuio.uio_td = curthread; 2384 2385 #ifdef XBB_USE_BOUNCE_BUFFERS 2386 saved_uio_iovcnt = xuio.uio_iovcnt; 2387 2388 if (operation == BIO_WRITE) { 2389 /* Copy the write data to the local buffer. */ 2390 for (seg_idx = 0, p_vaddr = file_data->xiovecs_vaddr, 2391 xiovec = xuio.uio_iov; seg_idx < xuio.uio_iovcnt; 2392 seg_idx++, xiovec++, p_vaddr++) { 2393 2394 memcpy(xiovec->iov_base, *p_vaddr, xiovec->iov_len); 2395 } 2396 } else { 2397 /* 2398 * We only need to save off the iovecs in the case of a 2399 * read, because the copy for the read happens after the 2400 * VOP_READ(). (The uio will get modified in that call 2401 * sequence.) 2402 */ 2403 memcpy(file_data->saved_xiovecs, xuio.uio_iov, 2404 xuio.uio_iovcnt * sizeof(xuio.uio_iov[0])); 2405 } 2406 #endif /* XBB_USE_BOUNCE_BUFFERS */ 2407 2408 switch (operation) { 2409 case BIO_READ: 2410 2411 SDT_PROBE3(xbb, kernel, xbb_dispatch_file, read, 2412 device_get_unit(xbb->dev), xuio.uio_offset, 2413 xuio.uio_resid); 2414 2415 vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY); 2416 2417 /* 2418 * UFS pays attention to IO_DIRECT for reads. If the 2419 * DIRECTIO option is configured into the kernel, it calls 2420 * ffs_rawread(). But that only works for single-segment 2421 * uios with user space addresses. In our case, with a 2422 * kernel uio, it still reads into the buffer cache, but it 2423 * will just try to release the buffer from the cache later 2424 * on in ffs_read(). 2425 * 2426 * ZFS does not pay attention to IO_DIRECT for reads. 2427 * 2428 * UFS does not pay attention to IO_SYNC for reads. 2429 * 2430 * ZFS pays attention to IO_SYNC (which translates into the 2431 * Solaris define FRSYNC for zfs_read()) for reads. It 2432 * attempts to sync the file before reading. 2433 * 2434 * So, to attempt to provide some barrier semantics in the 2435 * BIO_ORDERED case, set both IO_DIRECT and IO_SYNC. 2436 */ 2437 error = VOP_READ(xbb->vn, &xuio, (flags & BIO_ORDERED) ? 2438 (IO_DIRECT|IO_SYNC) : 0, file_data->cred); 2439 2440 VOP_UNLOCK(xbb->vn, 0); 2441 break; 2442 case BIO_WRITE: { 2443 struct mount *mountpoint; 2444 2445 SDT_PROBE3(xbb, kernel, xbb_dispatch_file, write, 2446 device_get_unit(xbb->dev), xuio.uio_offset, 2447 xuio.uio_resid); 2448 2449 (void)vn_start_write(xbb->vn, &mountpoint, V_WAIT); 2450 2451 vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY); 2452 2453 /* 2454 * UFS pays attention to IO_DIRECT for writes. The write 2455 * is done asynchronously. (Normally the write would just 2456 * get put into cache. 2457 * 2458 * UFS pays attention to IO_SYNC for writes. It will 2459 * attempt to write the buffer out synchronously if that 2460 * flag is set. 2461 * 2462 * ZFS does not pay attention to IO_DIRECT for writes. 2463 * 2464 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) 2465 * for writes. It will flush the transaction from the 2466 * cache before returning. 2467 * 2468 * So if we've got the BIO_ORDERED flag set, we want 2469 * IO_SYNC in either the UFS or ZFS case. 2470 */ 2471 error = VOP_WRITE(xbb->vn, &xuio, (flags & BIO_ORDERED) ? 2472 IO_SYNC : 0, file_data->cred); 2473 VOP_UNLOCK(xbb->vn, 0); 2474 2475 vn_finished_write(mountpoint); 2476 2477 break; 2478 } 2479 default: 2480 panic("invalid operation %d", operation); 2481 /* NOTREACHED */ 2482 } 2483 2484 #ifdef XBB_USE_BOUNCE_BUFFERS 2485 /* We only need to copy here for read operations */ 2486 if (operation == BIO_READ) { 2487 2488 for (seg_idx = 0, p_vaddr = file_data->xiovecs_vaddr, 2489 xiovec = file_data->saved_xiovecs; 2490 seg_idx < saved_uio_iovcnt; seg_idx++, 2491 xiovec++, p_vaddr++) { 2492 2493 /* 2494 * Note that we have to use the copy of the 2495 * io vector we made above. uiomove() modifies 2496 * the uio and its referenced vector as uiomove 2497 * performs the copy, so we can't rely on any 2498 * state from the original uio. 2499 */ 2500 memcpy(*p_vaddr, xiovec->iov_base, xiovec->iov_len); 2501 } 2502 } 2503 #endif /* XBB_USE_BOUNCE_BUFFERS */ 2504 2505 bailout_send_response: 2506 2507 if (error != 0) 2508 reqlist->status = BLKIF_RSP_ERROR; 2509 2510 xbb_complete_reqlist(xbb, reqlist); 2511 2512 return (0); 2513 } 2514 2515 /*--------------------------- Backend Configuration --------------------------*/ 2516 /** 2517 * Close and cleanup any backend device/file specific state for this 2518 * block back instance. 2519 * 2520 * \param xbb Per-instance xbb configuration structure. 2521 */ 2522 static void 2523 xbb_close_backend(struct xbb_softc *xbb) 2524 { 2525 DROP_GIANT(); 2526 DPRINTF("closing dev=%s\n", xbb->dev_name); 2527 if (xbb->vn) { 2528 int flags = FREAD; 2529 2530 if ((xbb->flags & XBBF_READ_ONLY) == 0) 2531 flags |= FWRITE; 2532 2533 switch (xbb->device_type) { 2534 case XBB_TYPE_DISK: 2535 if (xbb->backend.dev.csw) { 2536 dev_relthread(xbb->backend.dev.cdev, 2537 xbb->backend.dev.dev_ref); 2538 xbb->backend.dev.csw = NULL; 2539 xbb->backend.dev.cdev = NULL; 2540 } 2541 break; 2542 case XBB_TYPE_FILE: 2543 break; 2544 case XBB_TYPE_NONE: 2545 default: 2546 panic("Unexpected backend type."); 2547 break; 2548 } 2549 2550 (void)vn_close(xbb->vn, flags, NOCRED, curthread); 2551 xbb->vn = NULL; 2552 2553 switch (xbb->device_type) { 2554 case XBB_TYPE_DISK: 2555 break; 2556 case XBB_TYPE_FILE: 2557 if (xbb->backend.file.cred != NULL) { 2558 crfree(xbb->backend.file.cred); 2559 xbb->backend.file.cred = NULL; 2560 } 2561 break; 2562 case XBB_TYPE_NONE: 2563 default: 2564 panic("Unexpected backend type."); 2565 break; 2566 } 2567 } 2568 PICKUP_GIANT(); 2569 } 2570 2571 /** 2572 * Open a character device to be used for backend I/O. 2573 * 2574 * \param xbb Per-instance xbb configuration structure. 2575 * 2576 * \return 0 for success, errno codes for failure. 2577 */ 2578 static int 2579 xbb_open_dev(struct xbb_softc *xbb) 2580 { 2581 struct vattr vattr; 2582 struct cdev *dev; 2583 struct cdevsw *devsw; 2584 int error; 2585 2586 xbb->device_type = XBB_TYPE_DISK; 2587 xbb->dispatch_io = xbb_dispatch_dev; 2588 xbb->backend.dev.cdev = xbb->vn->v_rdev; 2589 xbb->backend.dev.csw = dev_refthread(xbb->backend.dev.cdev, 2590 &xbb->backend.dev.dev_ref); 2591 if (xbb->backend.dev.csw == NULL) 2592 panic("Unable to retrieve device switch"); 2593 2594 error = VOP_GETATTR(xbb->vn, &vattr, NOCRED); 2595 if (error) { 2596 xenbus_dev_fatal(xbb->dev, error, "error getting " 2597 "vnode attributes for device %s", 2598 xbb->dev_name); 2599 return (error); 2600 } 2601 2602 2603 dev = xbb->vn->v_rdev; 2604 devsw = dev->si_devsw; 2605 if (!devsw->d_ioctl) { 2606 xenbus_dev_fatal(xbb->dev, ENODEV, "no d_ioctl for " 2607 "device %s!", xbb->dev_name); 2608 return (ENODEV); 2609 } 2610 2611 error = devsw->d_ioctl(dev, DIOCGSECTORSIZE, 2612 (caddr_t)&xbb->sector_size, FREAD, 2613 curthread); 2614 if (error) { 2615 xenbus_dev_fatal(xbb->dev, error, 2616 "error calling ioctl DIOCGSECTORSIZE " 2617 "for device %s", xbb->dev_name); 2618 return (error); 2619 } 2620 2621 error = devsw->d_ioctl(dev, DIOCGMEDIASIZE, 2622 (caddr_t)&xbb->media_size, FREAD, 2623 curthread); 2624 if (error) { 2625 xenbus_dev_fatal(xbb->dev, error, 2626 "error calling ioctl DIOCGMEDIASIZE " 2627 "for device %s", xbb->dev_name); 2628 return (error); 2629 } 2630 2631 return (0); 2632 } 2633 2634 /** 2635 * Open a file to be used for backend I/O. 2636 * 2637 * \param xbb Per-instance xbb configuration structure. 2638 * 2639 * \return 0 for success, errno codes for failure. 2640 */ 2641 static int 2642 xbb_open_file(struct xbb_softc *xbb) 2643 { 2644 struct xbb_file_data *file_data; 2645 struct vattr vattr; 2646 int error; 2647 2648 file_data = &xbb->backend.file; 2649 xbb->device_type = XBB_TYPE_FILE; 2650 xbb->dispatch_io = xbb_dispatch_file; 2651 error = VOP_GETATTR(xbb->vn, &vattr, curthread->td_ucred); 2652 if (error != 0) { 2653 xenbus_dev_fatal(xbb->dev, error, 2654 "error calling VOP_GETATTR()" 2655 "for file %s", xbb->dev_name); 2656 return (error); 2657 } 2658 2659 /* 2660 * Verify that we have the ability to upgrade to exclusive 2661 * access on this file so we can trap errors at open instead 2662 * of reporting them during first access. 2663 */ 2664 if (VOP_ISLOCKED(xbb->vn) != LK_EXCLUSIVE) { 2665 vn_lock(xbb->vn, LK_UPGRADE | LK_RETRY); 2666 if (xbb->vn->v_iflag & VI_DOOMED) { 2667 error = EBADF; 2668 xenbus_dev_fatal(xbb->dev, error, 2669 "error locking file %s", 2670 xbb->dev_name); 2671 2672 return (error); 2673 } 2674 } 2675 2676 file_data->cred = crhold(curthread->td_ucred); 2677 xbb->media_size = vattr.va_size; 2678 2679 /* 2680 * XXX KDM vattr.va_blocksize may be larger than 512 bytes here. 2681 * With ZFS, it is 131072 bytes. Block sizes that large don't work 2682 * with disklabel and UFS on FreeBSD at least. Large block sizes 2683 * may not work with other OSes as well. So just export a sector 2684 * size of 512 bytes, which should work with any OS or 2685 * application. Since our backing is a file, any block size will 2686 * work fine for the backing store. 2687 */ 2688 #if 0 2689 xbb->sector_size = vattr.va_blocksize; 2690 #endif 2691 xbb->sector_size = 512; 2692 2693 /* 2694 * Sanity check. The media size has to be at least one 2695 * sector long. 2696 */ 2697 if (xbb->media_size < xbb->sector_size) { 2698 error = EINVAL; 2699 xenbus_dev_fatal(xbb->dev, error, 2700 "file %s size %ju < block size %u", 2701 xbb->dev_name, 2702 (uintmax_t)xbb->media_size, 2703 xbb->sector_size); 2704 } 2705 return (error); 2706 } 2707 2708 /** 2709 * Open the backend provider for this connection. 2710 * 2711 * \param xbb Per-instance xbb configuration structure. 2712 * 2713 * \return 0 for success, errno codes for failure. 2714 */ 2715 static int 2716 xbb_open_backend(struct xbb_softc *xbb) 2717 { 2718 struct nameidata nd; 2719 int flags; 2720 int error; 2721 2722 flags = FREAD; 2723 error = 0; 2724 2725 DPRINTF("opening dev=%s\n", xbb->dev_name); 2726 2727 if (rootvnode == NULL) { 2728 xenbus_dev_fatal(xbb->dev, ENOENT, 2729 "Root file system not mounted"); 2730 return (ENOENT); 2731 } 2732 2733 if ((xbb->flags & XBBF_READ_ONLY) == 0) 2734 flags |= FWRITE; 2735 2736 if (!curthread->td_proc->p_fd->fd_cdir) { 2737 curthread->td_proc->p_fd->fd_cdir = rootvnode; 2738 VREF(rootvnode); 2739 } 2740 if (!curthread->td_proc->p_fd->fd_rdir) { 2741 curthread->td_proc->p_fd->fd_rdir = rootvnode; 2742 VREF(rootvnode); 2743 } 2744 if (!curthread->td_proc->p_fd->fd_jdir) { 2745 curthread->td_proc->p_fd->fd_jdir = rootvnode; 2746 VREF(rootvnode); 2747 } 2748 2749 again: 2750 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, xbb->dev_name, curthread); 2751 error = vn_open(&nd, &flags, 0, NULL); 2752 if (error) { 2753 /* 2754 * This is the only reasonable guess we can make as far as 2755 * path if the user doesn't give us a fully qualified path. 2756 * If they want to specify a file, they need to specify the 2757 * full path. 2758 */ 2759 if (xbb->dev_name[0] != '/') { 2760 char *dev_path = "/dev/"; 2761 char *dev_name; 2762 2763 /* Try adding device path at beginning of name */ 2764 dev_name = malloc(strlen(xbb->dev_name) 2765 + strlen(dev_path) + 1, 2766 M_XENBLOCKBACK, M_NOWAIT); 2767 if (dev_name) { 2768 sprintf(dev_name, "%s%s", dev_path, 2769 xbb->dev_name); 2770 free(xbb->dev_name, M_XENBLOCKBACK); 2771 xbb->dev_name = dev_name; 2772 goto again; 2773 } 2774 } 2775 xenbus_dev_fatal(xbb->dev, error, "error opening device %s", 2776 xbb->dev_name); 2777 return (error); 2778 } 2779 2780 NDFREE(&nd, NDF_ONLY_PNBUF); 2781 2782 xbb->vn = nd.ni_vp; 2783 2784 /* We only support disks and files. */ 2785 if (vn_isdisk(xbb->vn, &error)) { 2786 error = xbb_open_dev(xbb); 2787 } else if (xbb->vn->v_type == VREG) { 2788 error = xbb_open_file(xbb); 2789 } else { 2790 error = EINVAL; 2791 xenbus_dev_fatal(xbb->dev, error, "%s is not a disk " 2792 "or file", xbb->dev_name); 2793 } 2794 VOP_UNLOCK(xbb->vn, 0); 2795 2796 if (error != 0) { 2797 xbb_close_backend(xbb); 2798 return (error); 2799 } 2800 2801 xbb->sector_size_shift = fls(xbb->sector_size) - 1; 2802 xbb->media_num_sectors = xbb->media_size >> xbb->sector_size_shift; 2803 2804 DPRINTF("opened %s=%s sector_size=%u media_size=%" PRId64 "\n", 2805 (xbb->device_type == XBB_TYPE_DISK) ? "dev" : "file", 2806 xbb->dev_name, xbb->sector_size, xbb->media_size); 2807 2808 return (0); 2809 } 2810 2811 /*------------------------ Inter-Domain Communication ------------------------*/ 2812 /** 2813 * Free dynamically allocated KVA or pseudo-physical address allocations. 2814 * 2815 * \param xbb Per-instance xbb configuration structure. 2816 */ 2817 static void 2818 xbb_free_communication_mem(struct xbb_softc *xbb) 2819 { 2820 if (xbb->kva != 0) { 2821 #ifndef XENHVM 2822 kva_free(xbb->kva, xbb->kva_size); 2823 #else 2824 if (xbb->pseudo_phys_res != NULL) { 2825 bus_release_resource(xbb->dev, SYS_RES_MEMORY, 2826 xbb->pseudo_phys_res_id, 2827 xbb->pseudo_phys_res); 2828 xbb->pseudo_phys_res = NULL; 2829 } 2830 #endif 2831 } 2832 xbb->kva = 0; 2833 xbb->gnt_base_addr = 0; 2834 if (xbb->kva_free != NULL) { 2835 free(xbb->kva_free, M_XENBLOCKBACK); 2836 xbb->kva_free = NULL; 2837 } 2838 } 2839 2840 /** 2841 * Cleanup all inter-domain communication mechanisms. 2842 * 2843 * \param xbb Per-instance xbb configuration structure. 2844 */ 2845 static int 2846 xbb_disconnect(struct xbb_softc *xbb) 2847 { 2848 struct gnttab_unmap_grant_ref ops[XBB_MAX_RING_PAGES]; 2849 struct gnttab_unmap_grant_ref *op; 2850 u_int ring_idx; 2851 int error; 2852 2853 DPRINTF("\n"); 2854 2855 if ((xbb->flags & XBBF_RING_CONNECTED) == 0) 2856 return (0); 2857 2858 xen_intr_unbind(&xbb->xen_intr_handle); 2859 2860 mtx_unlock(&xbb->lock); 2861 taskqueue_drain(xbb->io_taskqueue, &xbb->io_task); 2862 mtx_lock(&xbb->lock); 2863 2864 /* 2865 * No new interrupts can generate work, but we must wait 2866 * for all currently active requests to drain. 2867 */ 2868 if (xbb->active_request_count != 0) 2869 return (EAGAIN); 2870 2871 for (ring_idx = 0, op = ops; 2872 ring_idx < xbb->ring_config.ring_pages; 2873 ring_idx++, op++) { 2874 2875 op->host_addr = xbb->ring_config.gnt_addr 2876 + (ring_idx * PAGE_SIZE); 2877 op->dev_bus_addr = xbb->ring_config.bus_addr[ring_idx]; 2878 op->handle = xbb->ring_config.handle[ring_idx]; 2879 } 2880 2881 error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, ops, 2882 xbb->ring_config.ring_pages); 2883 if (error != 0) 2884 panic("Grant table op failed (%d)", error); 2885 2886 xbb_free_communication_mem(xbb); 2887 2888 if (xbb->requests != NULL) { 2889 free(xbb->requests, M_XENBLOCKBACK); 2890 xbb->requests = NULL; 2891 } 2892 2893 if (xbb->request_lists != NULL) { 2894 struct xbb_xen_reqlist *reqlist; 2895 int i; 2896 2897 /* There is one request list for ever allocated request. */ 2898 for (i = 0, reqlist = xbb->request_lists; 2899 i < xbb->max_requests; i++, reqlist++){ 2900 #ifdef XBB_USE_BOUNCE_BUFFERS 2901 if (reqlist->bounce != NULL) { 2902 free(reqlist->bounce, M_XENBLOCKBACK); 2903 reqlist->bounce = NULL; 2904 } 2905 #endif 2906 if (reqlist->gnt_handles != NULL) { 2907 free(reqlist->gnt_handles, M_XENBLOCKBACK); 2908 reqlist->gnt_handles = NULL; 2909 } 2910 } 2911 free(xbb->request_lists, M_XENBLOCKBACK); 2912 xbb->request_lists = NULL; 2913 } 2914 2915 xbb->flags &= ~XBBF_RING_CONNECTED; 2916 return (0); 2917 } 2918 2919 /** 2920 * Map shared memory ring into domain local address space, initialize 2921 * ring control structures, and bind an interrupt to the event channel 2922 * used to notify us of ring changes. 2923 * 2924 * \param xbb Per-instance xbb configuration structure. 2925 */ 2926 static int 2927 xbb_connect_ring(struct xbb_softc *xbb) 2928 { 2929 struct gnttab_map_grant_ref gnts[XBB_MAX_RING_PAGES]; 2930 struct gnttab_map_grant_ref *gnt; 2931 u_int ring_idx; 2932 int error; 2933 2934 if ((xbb->flags & XBBF_RING_CONNECTED) != 0) 2935 return (0); 2936 2937 /* 2938 * Kva for our ring is at the tail of the region of kva allocated 2939 * by xbb_alloc_communication_mem(). 2940 */ 2941 xbb->ring_config.va = xbb->kva 2942 + (xbb->kva_size 2943 - (xbb->ring_config.ring_pages * PAGE_SIZE)); 2944 xbb->ring_config.gnt_addr = xbb->gnt_base_addr 2945 + (xbb->kva_size 2946 - (xbb->ring_config.ring_pages * PAGE_SIZE)); 2947 2948 for (ring_idx = 0, gnt = gnts; 2949 ring_idx < xbb->ring_config.ring_pages; 2950 ring_idx++, gnt++) { 2951 2952 gnt->host_addr = xbb->ring_config.gnt_addr 2953 + (ring_idx * PAGE_SIZE); 2954 gnt->flags = GNTMAP_host_map; 2955 gnt->ref = xbb->ring_config.ring_ref[ring_idx]; 2956 gnt->dom = xbb->otherend_id; 2957 } 2958 2959 error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, gnts, 2960 xbb->ring_config.ring_pages); 2961 if (error) 2962 panic("blkback: Ring page grant table op failed (%d)", error); 2963 2964 for (ring_idx = 0, gnt = gnts; 2965 ring_idx < xbb->ring_config.ring_pages; 2966 ring_idx++, gnt++) { 2967 if (gnt->status != 0) { 2968 xbb->ring_config.va = 0; 2969 xenbus_dev_fatal(xbb->dev, EACCES, 2970 "Ring shared page mapping failed. " 2971 "Status %d.", gnt->status); 2972 return (EACCES); 2973 } 2974 xbb->ring_config.handle[ring_idx] = gnt->handle; 2975 xbb->ring_config.bus_addr[ring_idx] = gnt->dev_bus_addr; 2976 } 2977 2978 /* Initialize the ring based on ABI. */ 2979 switch (xbb->abi) { 2980 case BLKIF_PROTOCOL_NATIVE: 2981 { 2982 blkif_sring_t *sring; 2983 sring = (blkif_sring_t *)xbb->ring_config.va; 2984 BACK_RING_INIT(&xbb->rings.native, sring, 2985 xbb->ring_config.ring_pages * PAGE_SIZE); 2986 break; 2987 } 2988 case BLKIF_PROTOCOL_X86_32: 2989 { 2990 blkif_x86_32_sring_t *sring_x86_32; 2991 sring_x86_32 = (blkif_x86_32_sring_t *)xbb->ring_config.va; 2992 BACK_RING_INIT(&xbb->rings.x86_32, sring_x86_32, 2993 xbb->ring_config.ring_pages * PAGE_SIZE); 2994 break; 2995 } 2996 case BLKIF_PROTOCOL_X86_64: 2997 { 2998 blkif_x86_64_sring_t *sring_x86_64; 2999 sring_x86_64 = (blkif_x86_64_sring_t *)xbb->ring_config.va; 3000 BACK_RING_INIT(&xbb->rings.x86_64, sring_x86_64, 3001 xbb->ring_config.ring_pages * PAGE_SIZE); 3002 break; 3003 } 3004 default: 3005 panic("Unexpected blkif protocol ABI."); 3006 } 3007 3008 xbb->flags |= XBBF_RING_CONNECTED; 3009 3010 error = xen_intr_bind_remote_port(xbb->dev, 3011 xbb->otherend_id, 3012 xbb->ring_config.evtchn, 3013 xbb_filter, 3014 /*ithread_handler*/NULL, 3015 /*arg*/xbb, 3016 INTR_TYPE_BIO | INTR_MPSAFE, 3017 &xbb->xen_intr_handle); 3018 if (error) { 3019 (void)xbb_disconnect(xbb); 3020 xenbus_dev_fatal(xbb->dev, error, "binding event channel"); 3021 return (error); 3022 } 3023 3024 DPRINTF("rings connected!\n"); 3025 3026 return 0; 3027 } 3028 3029 /* Needed to make bit_alloc() macro work */ 3030 #define calloc(count, size) malloc((count)*(size), M_XENBLOCKBACK, \ 3031 M_NOWAIT|M_ZERO); 3032 3033 /** 3034 * Size KVA and pseudo-physical address allocations based on negotiated 3035 * values for the size and number of I/O requests, and the size of our 3036 * communication ring. 3037 * 3038 * \param xbb Per-instance xbb configuration structure. 3039 * 3040 * These address spaces are used to dynamically map pages in the 3041 * front-end's domain into our own. 3042 */ 3043 static int 3044 xbb_alloc_communication_mem(struct xbb_softc *xbb) 3045 { 3046 xbb->reqlist_kva_pages = xbb->max_requests * xbb->max_request_segments; 3047 xbb->reqlist_kva_size = xbb->reqlist_kva_pages * PAGE_SIZE; 3048 xbb->kva_size = xbb->reqlist_kva_size + 3049 (xbb->ring_config.ring_pages * PAGE_SIZE); 3050 3051 xbb->kva_free = bit_alloc(xbb->reqlist_kva_pages); 3052 if (xbb->kva_free == NULL) 3053 return (ENOMEM); 3054 3055 DPRINTF("%s: kva_size = %d, reqlist_kva_size = %d\n", 3056 device_get_nameunit(xbb->dev), xbb->kva_size, 3057 xbb->reqlist_kva_size); 3058 #ifndef XENHVM 3059 xbb->kva = kva_alloc(xbb->kva_size); 3060 if (xbb->kva == 0) 3061 return (ENOMEM); 3062 xbb->gnt_base_addr = xbb->kva; 3063 #else /* XENHVM */ 3064 /* 3065 * Reserve a range of pseudo physical memory that we can map 3066 * into kva. These pages will only be backed by machine 3067 * pages ("real memory") during the lifetime of front-end requests 3068 * via grant table operations. 3069 */ 3070 xbb->pseudo_phys_res_id = 0; 3071 xbb->pseudo_phys_res = bus_alloc_resource(xbb->dev, SYS_RES_MEMORY, 3072 &xbb->pseudo_phys_res_id, 3073 0, ~0, xbb->kva_size, 3074 RF_ACTIVE); 3075 if (xbb->pseudo_phys_res == NULL) { 3076 xbb->kva = 0; 3077 return (ENOMEM); 3078 } 3079 xbb->kva = (vm_offset_t)rman_get_virtual(xbb->pseudo_phys_res); 3080 xbb->gnt_base_addr = rman_get_start(xbb->pseudo_phys_res); 3081 #endif /* XENHVM */ 3082 3083 DPRINTF("%s: kva: %#jx, gnt_base_addr: %#jx\n", 3084 device_get_nameunit(xbb->dev), (uintmax_t)xbb->kva, 3085 (uintmax_t)xbb->gnt_base_addr); 3086 return (0); 3087 } 3088 3089 /** 3090 * Collect front-end information from the XenStore. 3091 * 3092 * \param xbb Per-instance xbb configuration structure. 3093 */ 3094 static int 3095 xbb_collect_frontend_info(struct xbb_softc *xbb) 3096 { 3097 char protocol_abi[64]; 3098 const char *otherend_path; 3099 int error; 3100 u_int ring_idx; 3101 u_int ring_page_order; 3102 size_t ring_size; 3103 3104 otherend_path = xenbus_get_otherend_path(xbb->dev); 3105 3106 /* 3107 * Protocol defaults valid even if all negotiation fails. 3108 */ 3109 xbb->ring_config.ring_pages = 1; 3110 xbb->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK; 3111 xbb->max_request_size = xbb->max_request_segments * PAGE_SIZE; 3112 3113 /* 3114 * Mandatory data (used in all versions of the protocol) first. 3115 */ 3116 error = xs_scanf(XST_NIL, otherend_path, 3117 "event-channel", NULL, "%" PRIu32, 3118 &xbb->ring_config.evtchn); 3119 if (error != 0) { 3120 xenbus_dev_fatal(xbb->dev, error, 3121 "Unable to retrieve event-channel information " 3122 "from frontend %s. Unable to connect.", 3123 xenbus_get_otherend_path(xbb->dev)); 3124 return (error); 3125 } 3126 3127 /* 3128 * These fields are initialized to legacy protocol defaults 3129 * so we only need to fail if reading the updated value succeeds 3130 * and the new value is outside of its allowed range. 3131 * 3132 * \note xs_gather() returns on the first encountered error, so 3133 * we must use independant calls in order to guarantee 3134 * we don't miss information in a sparsly populated front-end 3135 * tree. 3136 * 3137 * \note xs_scanf() does not update variables for unmatched 3138 * fields. 3139 */ 3140 ring_page_order = 0; 3141 (void)xs_scanf(XST_NIL, otherend_path, 3142 "ring-page-order", NULL, "%u", 3143 &ring_page_order); 3144 xbb->ring_config.ring_pages = 1 << ring_page_order; 3145 (void)xs_scanf(XST_NIL, otherend_path, 3146 "num-ring-pages", NULL, "%u", 3147 &xbb->ring_config.ring_pages); 3148 ring_size = PAGE_SIZE * xbb->ring_config.ring_pages; 3149 xbb->max_requests = BLKIF_MAX_RING_REQUESTS(ring_size); 3150 3151 (void)xs_scanf(XST_NIL, otherend_path, 3152 "max-requests", NULL, "%u", 3153 &xbb->max_requests); 3154 3155 (void)xs_scanf(XST_NIL, otherend_path, 3156 "max-request-segments", NULL, "%u", 3157 &xbb->max_request_segments); 3158 3159 (void)xs_scanf(XST_NIL, otherend_path, 3160 "max-request-size", NULL, "%u", 3161 &xbb->max_request_size); 3162 3163 if (xbb->ring_config.ring_pages > XBB_MAX_RING_PAGES) { 3164 xenbus_dev_fatal(xbb->dev, EINVAL, 3165 "Front-end specified ring-pages of %u " 3166 "exceeds backend limit of %zu. " 3167 "Unable to connect.", 3168 xbb->ring_config.ring_pages, 3169 XBB_MAX_RING_PAGES); 3170 return (EINVAL); 3171 } else if (xbb->max_requests > XBB_MAX_REQUESTS) { 3172 xenbus_dev_fatal(xbb->dev, EINVAL, 3173 "Front-end specified max_requests of %u " 3174 "exceeds backend limit of %u. " 3175 "Unable to connect.", 3176 xbb->max_requests, 3177 XBB_MAX_REQUESTS); 3178 return (EINVAL); 3179 } else if (xbb->max_request_segments > XBB_MAX_SEGMENTS_PER_REQUEST) { 3180 xenbus_dev_fatal(xbb->dev, EINVAL, 3181 "Front-end specified max_requests_segments " 3182 "of %u exceeds backend limit of %u. " 3183 "Unable to connect.", 3184 xbb->max_request_segments, 3185 XBB_MAX_SEGMENTS_PER_REQUEST); 3186 return (EINVAL); 3187 } else if (xbb->max_request_size > XBB_MAX_REQUEST_SIZE) { 3188 xenbus_dev_fatal(xbb->dev, EINVAL, 3189 "Front-end specified max_request_size " 3190 "of %u exceeds backend limit of %u. " 3191 "Unable to connect.", 3192 xbb->max_request_size, 3193 XBB_MAX_REQUEST_SIZE); 3194 return (EINVAL); 3195 } 3196 3197 if (xbb->ring_config.ring_pages == 1) { 3198 error = xs_gather(XST_NIL, otherend_path, 3199 "ring-ref", "%" PRIu32, 3200 &xbb->ring_config.ring_ref[0], 3201 NULL); 3202 if (error != 0) { 3203 xenbus_dev_fatal(xbb->dev, error, 3204 "Unable to retrieve ring information " 3205 "from frontend %s. Unable to " 3206 "connect.", 3207 xenbus_get_otherend_path(xbb->dev)); 3208 return (error); 3209 } 3210 } else { 3211 /* Multi-page ring format. */ 3212 for (ring_idx = 0; ring_idx < xbb->ring_config.ring_pages; 3213 ring_idx++) { 3214 char ring_ref_name[]= "ring_refXX"; 3215 3216 snprintf(ring_ref_name, sizeof(ring_ref_name), 3217 "ring-ref%u", ring_idx); 3218 error = xs_scanf(XST_NIL, otherend_path, 3219 ring_ref_name, NULL, "%" PRIu32, 3220 &xbb->ring_config.ring_ref[ring_idx]); 3221 if (error != 0) { 3222 xenbus_dev_fatal(xbb->dev, error, 3223 "Failed to retriev grant " 3224 "reference for page %u of " 3225 "shared ring. Unable " 3226 "to connect.", ring_idx); 3227 return (error); 3228 } 3229 } 3230 } 3231 3232 error = xs_gather(XST_NIL, otherend_path, 3233 "protocol", "%63s", protocol_abi, 3234 NULL); 3235 if (error != 0 3236 || !strcmp(protocol_abi, XEN_IO_PROTO_ABI_NATIVE)) { 3237 /* 3238 * Assume native if the frontend has not 3239 * published ABI data or it has published and 3240 * matches our own ABI. 3241 */ 3242 xbb->abi = BLKIF_PROTOCOL_NATIVE; 3243 } else if (!strcmp(protocol_abi, XEN_IO_PROTO_ABI_X86_32)) { 3244 3245 xbb->abi = BLKIF_PROTOCOL_X86_32; 3246 } else if (!strcmp(protocol_abi, XEN_IO_PROTO_ABI_X86_64)) { 3247 3248 xbb->abi = BLKIF_PROTOCOL_X86_64; 3249 } else { 3250 3251 xenbus_dev_fatal(xbb->dev, EINVAL, 3252 "Unknown protocol ABI (%s) published by " 3253 "frontend. Unable to connect.", protocol_abi); 3254 return (EINVAL); 3255 } 3256 return (0); 3257 } 3258 3259 /** 3260 * Allocate per-request data structures given request size and number 3261 * information negotiated with the front-end. 3262 * 3263 * \param xbb Per-instance xbb configuration structure. 3264 */ 3265 static int 3266 xbb_alloc_requests(struct xbb_softc *xbb) 3267 { 3268 struct xbb_xen_req *req; 3269 struct xbb_xen_req *last_req; 3270 3271 /* 3272 * Allocate request book keeping datastructures. 3273 */ 3274 xbb->requests = malloc(xbb->max_requests * sizeof(*xbb->requests), 3275 M_XENBLOCKBACK, M_NOWAIT|M_ZERO); 3276 if (xbb->requests == NULL) { 3277 xenbus_dev_fatal(xbb->dev, ENOMEM, 3278 "Unable to allocate request structures"); 3279 return (ENOMEM); 3280 } 3281 3282 req = xbb->requests; 3283 last_req = &xbb->requests[xbb->max_requests - 1]; 3284 STAILQ_INIT(&xbb->request_free_stailq); 3285 while (req <= last_req) { 3286 STAILQ_INSERT_TAIL(&xbb->request_free_stailq, req, links); 3287 req++; 3288 } 3289 return (0); 3290 } 3291 3292 static int 3293 xbb_alloc_request_lists(struct xbb_softc *xbb) 3294 { 3295 struct xbb_xen_reqlist *reqlist; 3296 int i; 3297 3298 /* 3299 * If no requests can be merged, we need 1 request list per 3300 * in flight request. 3301 */ 3302 xbb->request_lists = malloc(xbb->max_requests * 3303 sizeof(*xbb->request_lists), M_XENBLOCKBACK, M_NOWAIT|M_ZERO); 3304 if (xbb->request_lists == NULL) { 3305 xenbus_dev_fatal(xbb->dev, ENOMEM, 3306 "Unable to allocate request list structures"); 3307 return (ENOMEM); 3308 } 3309 3310 STAILQ_INIT(&xbb->reqlist_free_stailq); 3311 STAILQ_INIT(&xbb->reqlist_pending_stailq); 3312 for (i = 0; i < xbb->max_requests; i++) { 3313 int seg; 3314 3315 reqlist = &xbb->request_lists[i]; 3316 3317 reqlist->xbb = xbb; 3318 3319 #ifdef XBB_USE_BOUNCE_BUFFERS 3320 reqlist->bounce = malloc(xbb->max_reqlist_size, 3321 M_XENBLOCKBACK, M_NOWAIT); 3322 if (reqlist->bounce == NULL) { 3323 xenbus_dev_fatal(xbb->dev, ENOMEM, 3324 "Unable to allocate request " 3325 "bounce buffers"); 3326 return (ENOMEM); 3327 } 3328 #endif /* XBB_USE_BOUNCE_BUFFERS */ 3329 3330 reqlist->gnt_handles = malloc(xbb->max_reqlist_segments * 3331 sizeof(*reqlist->gnt_handles), 3332 M_XENBLOCKBACK, M_NOWAIT|M_ZERO); 3333 if (reqlist->gnt_handles == NULL) { 3334 xenbus_dev_fatal(xbb->dev, ENOMEM, 3335 "Unable to allocate request " 3336 "grant references"); 3337 return (ENOMEM); 3338 } 3339 3340 for (seg = 0; seg < xbb->max_reqlist_segments; seg++) 3341 reqlist->gnt_handles[seg] = GRANT_REF_INVALID; 3342 3343 STAILQ_INSERT_TAIL(&xbb->reqlist_free_stailq, reqlist, links); 3344 } 3345 return (0); 3346 } 3347 3348 /** 3349 * Supply information about the physical device to the frontend 3350 * via XenBus. 3351 * 3352 * \param xbb Per-instance xbb configuration structure. 3353 */ 3354 static int 3355 xbb_publish_backend_info(struct xbb_softc *xbb) 3356 { 3357 struct xs_transaction xst; 3358 const char *our_path; 3359 const char *leaf; 3360 int error; 3361 3362 our_path = xenbus_get_node(xbb->dev); 3363 while (1) { 3364 error = xs_transaction_start(&xst); 3365 if (error != 0) { 3366 xenbus_dev_fatal(xbb->dev, error, 3367 "Error publishing backend info " 3368 "(start transaction)"); 3369 return (error); 3370 } 3371 3372 leaf = "sectors"; 3373 error = xs_printf(xst, our_path, leaf, 3374 "%"PRIu64, xbb->media_num_sectors); 3375 if (error != 0) 3376 break; 3377 3378 /* XXX Support all VBD attributes here. */ 3379 leaf = "info"; 3380 error = xs_printf(xst, our_path, leaf, "%u", 3381 xbb->flags & XBBF_READ_ONLY 3382 ? VDISK_READONLY : 0); 3383 if (error != 0) 3384 break; 3385 3386 leaf = "sector-size"; 3387 error = xs_printf(xst, our_path, leaf, "%u", 3388 xbb->sector_size); 3389 if (error != 0) 3390 break; 3391 3392 error = xs_transaction_end(xst, 0); 3393 if (error == 0) { 3394 return (0); 3395 } else if (error != EAGAIN) { 3396 xenbus_dev_fatal(xbb->dev, error, "ending transaction"); 3397 return (error); 3398 } 3399 } 3400 3401 xenbus_dev_fatal(xbb->dev, error, "writing %s/%s", 3402 our_path, leaf); 3403 xs_transaction_end(xst, 1); 3404 return (error); 3405 } 3406 3407 /** 3408 * Connect to our blkfront peer now that it has completed publishing 3409 * its configuration into the XenStore. 3410 * 3411 * \param xbb Per-instance xbb configuration structure. 3412 */ 3413 static void 3414 xbb_connect(struct xbb_softc *xbb) 3415 { 3416 int error; 3417 3418 if (xenbus_get_state(xbb->dev) == XenbusStateConnected) 3419 return; 3420 3421 if (xbb_collect_frontend_info(xbb) != 0) 3422 return; 3423 3424 xbb->flags &= ~XBBF_SHUTDOWN; 3425 3426 /* 3427 * We limit the maximum number of reqlist segments to the maximum 3428 * number of segments in the ring, or our absolute maximum, 3429 * whichever is smaller. 3430 */ 3431 xbb->max_reqlist_segments = MIN(xbb->max_request_segments * 3432 xbb->max_requests, XBB_MAX_SEGMENTS_PER_REQLIST); 3433 3434 /* 3435 * The maximum size is simply a function of the number of segments 3436 * we can handle. 3437 */ 3438 xbb->max_reqlist_size = xbb->max_reqlist_segments * PAGE_SIZE; 3439 3440 /* Allocate resources whose size depends on front-end configuration. */ 3441 error = xbb_alloc_communication_mem(xbb); 3442 if (error != 0) { 3443 xenbus_dev_fatal(xbb->dev, error, 3444 "Unable to allocate communication memory"); 3445 return; 3446 } 3447 3448 error = xbb_alloc_requests(xbb); 3449 if (error != 0) { 3450 /* Specific errors are reported by xbb_alloc_requests(). */ 3451 return; 3452 } 3453 3454 error = xbb_alloc_request_lists(xbb); 3455 if (error != 0) { 3456 /* Specific errors are reported by xbb_alloc_request_lists(). */ 3457 return; 3458 } 3459 3460 /* 3461 * Connect communication channel. 3462 */ 3463 error = xbb_connect_ring(xbb); 3464 if (error != 0) { 3465 /* Specific errors are reported by xbb_connect_ring(). */ 3466 return; 3467 } 3468 3469 if (xbb_publish_backend_info(xbb) != 0) { 3470 /* 3471 * If we can't publish our data, we cannot participate 3472 * in this connection, and waiting for a front-end state 3473 * change will not help the situation. 3474 */ 3475 (void)xbb_disconnect(xbb); 3476 return; 3477 } 3478 3479 /* Ready for I/O. */ 3480 xenbus_set_state(xbb->dev, XenbusStateConnected); 3481 } 3482 3483 /*-------------------------- Device Teardown Support -------------------------*/ 3484 /** 3485 * Perform device shutdown functions. 3486 * 3487 * \param xbb Per-instance xbb configuration structure. 3488 * 3489 * Mark this instance as shutting down, wait for any active I/O on the 3490 * backend device/file to drain, disconnect from the front-end, and notify 3491 * any waiters (e.g. a thread invoking our detach method) that detach can 3492 * now proceed. 3493 */ 3494 static int 3495 xbb_shutdown(struct xbb_softc *xbb) 3496 { 3497 XenbusState frontState; 3498 int error; 3499 3500 DPRINTF("\n"); 3501 3502 /* 3503 * Due to the need to drop our mutex during some 3504 * xenbus operations, it is possible for two threads 3505 * to attempt to close out shutdown processing at 3506 * the same time. Tell the caller that hits this 3507 * race to try back later. 3508 */ 3509 if ((xbb->flags & XBBF_IN_SHUTDOWN) != 0) 3510 return (EAGAIN); 3511 3512 xbb->flags |= XBBF_IN_SHUTDOWN; 3513 mtx_unlock(&xbb->lock); 3514 3515 if (xenbus_get_state(xbb->dev) < XenbusStateClosing) 3516 xenbus_set_state(xbb->dev, XenbusStateClosing); 3517 3518 frontState = xenbus_get_otherend_state(xbb->dev); 3519 mtx_lock(&xbb->lock); 3520 xbb->flags &= ~XBBF_IN_SHUTDOWN; 3521 3522 /* The front can submit I/O until entering the closed state. */ 3523 if (frontState < XenbusStateClosed) 3524 return (EAGAIN); 3525 3526 DPRINTF("\n"); 3527 3528 /* Indicate shutdown is in progress. */ 3529 xbb->flags |= XBBF_SHUTDOWN; 3530 3531 /* Disconnect from the front-end. */ 3532 error = xbb_disconnect(xbb); 3533 if (error != 0) { 3534 /* 3535 * Requests still outstanding. We'll be called again 3536 * once they complete. 3537 */ 3538 KASSERT(error == EAGAIN, 3539 ("%s: Unexpected xbb_disconnect() failure %d", 3540 __func__, error)); 3541 3542 return (error); 3543 } 3544 3545 DPRINTF("\n"); 3546 3547 /* Indicate to xbb_detach() that is it safe to proceed. */ 3548 wakeup(xbb); 3549 3550 return (0); 3551 } 3552 3553 /** 3554 * Report an attach time error to the console and Xen, and cleanup 3555 * this instance by forcing immediate detach processing. 3556 * 3557 * \param xbb Per-instance xbb configuration structure. 3558 * \param err Errno describing the error. 3559 * \param fmt Printf style format and arguments 3560 */ 3561 static void 3562 xbb_attach_failed(struct xbb_softc *xbb, int err, const char *fmt, ...) 3563 { 3564 va_list ap; 3565 va_list ap_hotplug; 3566 3567 va_start(ap, fmt); 3568 va_copy(ap_hotplug, ap); 3569 xs_vprintf(XST_NIL, xenbus_get_node(xbb->dev), 3570 "hotplug-error", fmt, ap_hotplug); 3571 va_end(ap_hotplug); 3572 xs_printf(XST_NIL, xenbus_get_node(xbb->dev), 3573 "hotplug-status", "error"); 3574 3575 xenbus_dev_vfatal(xbb->dev, err, fmt, ap); 3576 va_end(ap); 3577 3578 xs_printf(XST_NIL, xenbus_get_node(xbb->dev), 3579 "online", "0"); 3580 xbb_detach(xbb->dev); 3581 } 3582 3583 /*---------------------------- NewBus Entrypoints ----------------------------*/ 3584 /** 3585 * Inspect a XenBus device and claim it if is of the appropriate type. 3586 * 3587 * \param dev NewBus device object representing a candidate XenBus device. 3588 * 3589 * \return 0 for success, errno codes for failure. 3590 */ 3591 static int 3592 xbb_probe(device_t dev) 3593 { 3594 3595 if (!strcmp(xenbus_get_type(dev), "vbd")) { 3596 device_set_desc(dev, "Backend Virtual Block Device"); 3597 device_quiet(dev); 3598 return (0); 3599 } 3600 3601 return (ENXIO); 3602 } 3603 3604 /** 3605 * Setup sysctl variables to control various Block Back parameters. 3606 * 3607 * \param xbb Xen Block Back softc. 3608 * 3609 */ 3610 static void 3611 xbb_setup_sysctl(struct xbb_softc *xbb) 3612 { 3613 struct sysctl_ctx_list *sysctl_ctx = NULL; 3614 struct sysctl_oid *sysctl_tree = NULL; 3615 3616 sysctl_ctx = device_get_sysctl_ctx(xbb->dev); 3617 if (sysctl_ctx == NULL) 3618 return; 3619 3620 sysctl_tree = device_get_sysctl_tree(xbb->dev); 3621 if (sysctl_tree == NULL) 3622 return; 3623 3624 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, 3625 "disable_flush", CTLFLAG_RW, &xbb->disable_flush, 0, 3626 "fake the flush command"); 3627 3628 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, 3629 "flush_interval", CTLFLAG_RW, &xbb->flush_interval, 0, 3630 "send a real flush for N flush requests"); 3631 3632 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, 3633 "no_coalesce_reqs", CTLFLAG_RW, &xbb->no_coalesce_reqs,0, 3634 "Don't coalesce contiguous requests"); 3635 3636 SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, 3637 "reqs_received", CTLFLAG_RW, &xbb->reqs_received, 3638 "how many I/O requests we have received"); 3639 3640 SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, 3641 "reqs_completed", CTLFLAG_RW, &xbb->reqs_completed, 3642 "how many I/O requests have been completed"); 3643 3644 SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, 3645 "reqs_queued_for_completion", CTLFLAG_RW, 3646 &xbb->reqs_queued_for_completion, 3647 "how many I/O requests queued but not yet pushed"); 3648 3649 SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, 3650 "reqs_completed_with_error", CTLFLAG_RW, 3651 &xbb->reqs_completed_with_error, 3652 "how many I/O requests completed with error status"); 3653 3654 SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, 3655 "forced_dispatch", CTLFLAG_RW, &xbb->forced_dispatch, 3656 "how many I/O dispatches were forced"); 3657 3658 SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, 3659 "normal_dispatch", CTLFLAG_RW, &xbb->normal_dispatch, 3660 "how many I/O dispatches were normal"); 3661 3662 SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, 3663 "total_dispatch", CTLFLAG_RW, &xbb->total_dispatch, 3664 "total number of I/O dispatches"); 3665 3666 SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, 3667 "kva_shortages", CTLFLAG_RW, &xbb->kva_shortages, 3668 "how many times we have run out of KVA"); 3669 3670 SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, 3671 "request_shortages", CTLFLAG_RW, 3672 &xbb->request_shortages, 3673 "how many times we have run out of requests"); 3674 3675 SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, 3676 "max_requests", CTLFLAG_RD, &xbb->max_requests, 0, 3677 "maximum outstanding requests (negotiated)"); 3678 3679 SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, 3680 "max_request_segments", CTLFLAG_RD, 3681 &xbb->max_request_segments, 0, 3682 "maximum number of pages per requests (negotiated)"); 3683 3684 SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, 3685 "max_request_size", CTLFLAG_RD, 3686 &xbb->max_request_size, 0, 3687 "maximum size in bytes of a request (negotiated)"); 3688 3689 SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, 3690 "ring_pages", CTLFLAG_RD, 3691 &xbb->ring_config.ring_pages, 0, 3692 "communication channel pages (negotiated)"); 3693 } 3694 3695 /** 3696 * Attach to a XenBus device that has been claimed by our probe routine. 3697 * 3698 * \param dev NewBus device object representing this Xen Block Back instance. 3699 * 3700 * \return 0 for success, errno codes for failure. 3701 */ 3702 static int 3703 xbb_attach(device_t dev) 3704 { 3705 struct xbb_softc *xbb; 3706 int error; 3707 u_int max_ring_page_order; 3708 3709 DPRINTF("Attaching to %s\n", xenbus_get_node(dev)); 3710 3711 /* 3712 * Basic initialization. 3713 * After this block it is safe to call xbb_detach() 3714 * to clean up any allocated data for this instance. 3715 */ 3716 xbb = device_get_softc(dev); 3717 xbb->dev = dev; 3718 xbb->otherend_id = xenbus_get_otherend_id(dev); 3719 TASK_INIT(&xbb->io_task, /*priority*/0, xbb_run_queue, xbb); 3720 mtx_init(&xbb->lock, device_get_nameunit(dev), NULL, MTX_DEF); 3721 3722 /* 3723 * Publish protocol capabilities for consumption by the 3724 * front-end. 3725 */ 3726 error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), 3727 "feature-barrier", "1"); 3728 if (error) { 3729 xbb_attach_failed(xbb, error, "writing %s/feature-barrier", 3730 xenbus_get_node(xbb->dev)); 3731 return (error); 3732 } 3733 3734 error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), 3735 "feature-flush-cache", "1"); 3736 if (error) { 3737 xbb_attach_failed(xbb, error, "writing %s/feature-flush-cache", 3738 xenbus_get_node(xbb->dev)); 3739 return (error); 3740 } 3741 3742 /* 3743 * Amazon EC2 client compatility. They refer to max-ring-pages 3744 * instead of to max-ring-page-order. 3745 */ 3746 error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), 3747 "max-ring-pages", "%zu", XBB_MAX_RING_PAGES); 3748 if (error) { 3749 xbb_attach_failed(xbb, error, "writing %s/max-ring-pages", 3750 xenbus_get_node(xbb->dev)); 3751 return (error); 3752 } 3753 3754 max_ring_page_order = flsl(XBB_MAX_RING_PAGES) - 1; 3755 error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), 3756 "max-ring-page-order", "%u", max_ring_page_order); 3757 if (error) { 3758 xbb_attach_failed(xbb, error, "writing %s/max-ring-page-order", 3759 xenbus_get_node(xbb->dev)); 3760 return (error); 3761 } 3762 3763 error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), 3764 "max-requests", "%u", XBB_MAX_REQUESTS); 3765 if (error) { 3766 xbb_attach_failed(xbb, error, "writing %s/max-requests", 3767 xenbus_get_node(xbb->dev)); 3768 return (error); 3769 } 3770 3771 error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), 3772 "max-request-segments", "%u", 3773 XBB_MAX_SEGMENTS_PER_REQUEST); 3774 if (error) { 3775 xbb_attach_failed(xbb, error, "writing %s/max-request-segments", 3776 xenbus_get_node(xbb->dev)); 3777 return (error); 3778 } 3779 3780 error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), 3781 "max-request-size", "%u", 3782 XBB_MAX_REQUEST_SIZE); 3783 if (error) { 3784 xbb_attach_failed(xbb, error, "writing %s/max-request-size", 3785 xenbus_get_node(xbb->dev)); 3786 return (error); 3787 } 3788 3789 /* Collect physical device information. */ 3790 error = xs_gather(XST_NIL, xenbus_get_otherend_path(xbb->dev), 3791 "device-type", NULL, &xbb->dev_type, 3792 NULL); 3793 if (error != 0) 3794 xbb->dev_type = NULL; 3795 3796 error = xs_gather(XST_NIL, xenbus_get_node(dev), 3797 "mode", NULL, &xbb->dev_mode, 3798 "params", NULL, &xbb->dev_name, 3799 NULL); 3800 if (error != 0) { 3801 xbb_attach_failed(xbb, error, "reading backend fields at %s", 3802 xenbus_get_node(dev)); 3803 return (ENXIO); 3804 } 3805 3806 /* Parse fopen style mode flags. */ 3807 if (strchr(xbb->dev_mode, 'w') == NULL) 3808 xbb->flags |= XBBF_READ_ONLY; 3809 3810 /* 3811 * Verify the physical device is present and can support 3812 * the desired I/O mode. 3813 */ 3814 DROP_GIANT(); 3815 error = xbb_open_backend(xbb); 3816 PICKUP_GIANT(); 3817 if (error != 0) { 3818 xbb_attach_failed(xbb, error, "Unable to open %s", 3819 xbb->dev_name); 3820 return (ENXIO); 3821 } 3822 3823 /* Use devstat(9) for recording statistics. */ 3824 xbb->xbb_stats = devstat_new_entry("xbb", device_get_unit(xbb->dev), 3825 xbb->sector_size, 3826 DEVSTAT_ALL_SUPPORTED, 3827 DEVSTAT_TYPE_DIRECT 3828 | DEVSTAT_TYPE_IF_OTHER, 3829 DEVSTAT_PRIORITY_OTHER); 3830 3831 xbb->xbb_stats_in = devstat_new_entry("xbbi", device_get_unit(xbb->dev), 3832 xbb->sector_size, 3833 DEVSTAT_ALL_SUPPORTED, 3834 DEVSTAT_TYPE_DIRECT 3835 | DEVSTAT_TYPE_IF_OTHER, 3836 DEVSTAT_PRIORITY_OTHER); 3837 /* 3838 * Setup sysctl variables. 3839 */ 3840 xbb_setup_sysctl(xbb); 3841 3842 /* 3843 * Create a taskqueue for doing work that must occur from a 3844 * thread context. 3845 */ 3846 xbb->io_taskqueue = taskqueue_create_fast(device_get_nameunit(dev), 3847 M_NOWAIT, 3848 taskqueue_thread_enqueue, 3849 /*contxt*/&xbb->io_taskqueue); 3850 if (xbb->io_taskqueue == NULL) { 3851 xbb_attach_failed(xbb, error, "Unable to create taskqueue"); 3852 return (ENOMEM); 3853 } 3854 3855 taskqueue_start_threads(&xbb->io_taskqueue, 3856 /*num threads*/1, 3857 /*priority*/PWAIT, 3858 /*thread name*/ 3859 "%s taskq", device_get_nameunit(dev)); 3860 3861 /* Update hot-plug status to satisfy xend. */ 3862 error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), 3863 "hotplug-status", "connected"); 3864 if (error) { 3865 xbb_attach_failed(xbb, error, "writing %s/hotplug-status", 3866 xenbus_get_node(xbb->dev)); 3867 return (error); 3868 } 3869 3870 /* Tell the front end that we are ready to connect. */ 3871 xenbus_set_state(dev, XenbusStateInitWait); 3872 3873 return (0); 3874 } 3875 3876 /** 3877 * Detach from a block back device instance. 3878 * 3879 * \param dev NewBus device object representing this Xen Block Back instance. 3880 * 3881 * \return 0 for success, errno codes for failure. 3882 * 3883 * \note A block back device may be detached at any time in its life-cycle, 3884 * including part way through the attach process. For this reason, 3885 * initialization order and the intialization state checks in this 3886 * routine must be carefully coupled so that attach time failures 3887 * are gracefully handled. 3888 */ 3889 static int 3890 xbb_detach(device_t dev) 3891 { 3892 struct xbb_softc *xbb; 3893 3894 DPRINTF("\n"); 3895 3896 xbb = device_get_softc(dev); 3897 mtx_lock(&xbb->lock); 3898 while (xbb_shutdown(xbb) == EAGAIN) { 3899 msleep(xbb, &xbb->lock, /*wakeup prio unchanged*/0, 3900 "xbb_shutdown", 0); 3901 } 3902 mtx_unlock(&xbb->lock); 3903 3904 DPRINTF("\n"); 3905 3906 if (xbb->io_taskqueue != NULL) 3907 taskqueue_free(xbb->io_taskqueue); 3908 3909 if (xbb->xbb_stats != NULL) 3910 devstat_remove_entry(xbb->xbb_stats); 3911 3912 if (xbb->xbb_stats_in != NULL) 3913 devstat_remove_entry(xbb->xbb_stats_in); 3914 3915 xbb_close_backend(xbb); 3916 3917 if (xbb->dev_mode != NULL) { 3918 free(xbb->dev_mode, M_XENSTORE); 3919 xbb->dev_mode = NULL; 3920 } 3921 3922 if (xbb->dev_type != NULL) { 3923 free(xbb->dev_type, M_XENSTORE); 3924 xbb->dev_type = NULL; 3925 } 3926 3927 if (xbb->dev_name != NULL) { 3928 free(xbb->dev_name, M_XENSTORE); 3929 xbb->dev_name = NULL; 3930 } 3931 3932 mtx_destroy(&xbb->lock); 3933 return (0); 3934 } 3935 3936 /** 3937 * Prepare this block back device for suspension of this VM. 3938 * 3939 * \param dev NewBus device object representing this Xen Block Back instance. 3940 * 3941 * \return 0 for success, errno codes for failure. 3942 */ 3943 static int 3944 xbb_suspend(device_t dev) 3945 { 3946 #ifdef NOT_YET 3947 struct xbb_softc *sc = device_get_softc(dev); 3948 3949 /* Prevent new requests being issued until we fix things up. */ 3950 mtx_lock(&sc->xb_io_lock); 3951 sc->connected = BLKIF_STATE_SUSPENDED; 3952 mtx_unlock(&sc->xb_io_lock); 3953 #endif 3954 3955 return (0); 3956 } 3957 3958 /** 3959 * Perform any processing required to recover from a suspended state. 3960 * 3961 * \param dev NewBus device object representing this Xen Block Back instance. 3962 * 3963 * \return 0 for success, errno codes for failure. 3964 */ 3965 static int 3966 xbb_resume(device_t dev) 3967 { 3968 return (0); 3969 } 3970 3971 /** 3972 * Handle state changes expressed via the XenStore by our front-end peer. 3973 * 3974 * \param dev NewBus device object representing this Xen 3975 * Block Back instance. 3976 * \param frontend_state The new state of the front-end. 3977 * 3978 * \return 0 for success, errno codes for failure. 3979 */ 3980 static void 3981 xbb_frontend_changed(device_t dev, XenbusState frontend_state) 3982 { 3983 struct xbb_softc *xbb = device_get_softc(dev); 3984 3985 DPRINTF("frontend_state=%s, xbb_state=%s\n", 3986 xenbus_strstate(frontend_state), 3987 xenbus_strstate(xenbus_get_state(xbb->dev))); 3988 3989 switch (frontend_state) { 3990 case XenbusStateInitialising: 3991 break; 3992 case XenbusStateInitialised: 3993 case XenbusStateConnected: 3994 xbb_connect(xbb); 3995 break; 3996 case XenbusStateClosing: 3997 case XenbusStateClosed: 3998 mtx_lock(&xbb->lock); 3999 xbb_shutdown(xbb); 4000 mtx_unlock(&xbb->lock); 4001 if (frontend_state == XenbusStateClosed) 4002 xenbus_set_state(xbb->dev, XenbusStateClosed); 4003 break; 4004 default: 4005 xenbus_dev_fatal(xbb->dev, EINVAL, "saw state %d at frontend", 4006 frontend_state); 4007 break; 4008 } 4009 } 4010 4011 /*---------------------------- NewBus Registration ---------------------------*/ 4012 static device_method_t xbb_methods[] = { 4013 /* Device interface */ 4014 DEVMETHOD(device_probe, xbb_probe), 4015 DEVMETHOD(device_attach, xbb_attach), 4016 DEVMETHOD(device_detach, xbb_detach), 4017 DEVMETHOD(device_shutdown, bus_generic_shutdown), 4018 DEVMETHOD(device_suspend, xbb_suspend), 4019 DEVMETHOD(device_resume, xbb_resume), 4020 4021 /* Xenbus interface */ 4022 DEVMETHOD(xenbus_otherend_changed, xbb_frontend_changed), 4023 4024 { 0, 0 } 4025 }; 4026 4027 static driver_t xbb_driver = { 4028 "xbbd", 4029 xbb_methods, 4030 sizeof(struct xbb_softc), 4031 }; 4032 devclass_t xbb_devclass; 4033 4034 DRIVER_MODULE(xbbd, xenbusb_back, xbb_driver, xbb_devclass, 0, 0); 4035