1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Virtio ring implementation. 3 * 4 * Copyright 2007 Rusty Russell IBM Corporation 5 */ 6 #include <linux/virtio.h> 7 #include <linux/virtio_ring.h> 8 #include <linux/virtio_config.h> 9 #include <linux/device.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/hrtimer.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/kmsan.h> 15 #include <linux/spinlock.h> 16 #include <xen/xen.h> 17 18 #ifdef DEBUG 19 /* For development, we want to crash whenever the ring is screwed. */ 20 #define BAD_RING(_vq, fmt, args...) \ 21 do { \ 22 dev_err(&(_vq)->vq.vdev->dev, \ 23 "%s:"fmt, (_vq)->vq.name, ##args); \ 24 BUG(); \ 25 } while (0) 26 /* Caller is supposed to guarantee no reentry. */ 27 #define START_USE(_vq) \ 28 do { \ 29 if ((_vq)->in_use) \ 30 panic("%s:in_use = %i\n", \ 31 (_vq)->vq.name, (_vq)->in_use); \ 32 (_vq)->in_use = __LINE__; \ 33 } while (0) 34 #define END_USE(_vq) \ 35 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 36 #define LAST_ADD_TIME_UPDATE(_vq) \ 37 do { \ 38 ktime_t now = ktime_get(); \ 39 \ 40 /* No kick or get, with .1 second between? Warn. */ \ 41 if ((_vq)->last_add_time_valid) \ 42 WARN_ON(ktime_to_ms(ktime_sub(now, \ 43 (_vq)->last_add_time)) > 100); \ 44 (_vq)->last_add_time = now; \ 45 (_vq)->last_add_time_valid = true; \ 46 } while (0) 47 #define LAST_ADD_TIME_CHECK(_vq) \ 48 do { \ 49 if ((_vq)->last_add_time_valid) { \ 50 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 51 (_vq)->last_add_time)) > 100); \ 52 } \ 53 } while (0) 54 #define LAST_ADD_TIME_INVALID(_vq) \ 55 ((_vq)->last_add_time_valid = false) 56 #else 57 #define BAD_RING(_vq, fmt, args...) \ 58 do { \ 59 dev_err(&_vq->vq.vdev->dev, \ 60 "%s:"fmt, (_vq)->vq.name, ##args); \ 61 (_vq)->broken = true; \ 62 } while (0) 63 #define START_USE(vq) 64 #define END_USE(vq) 65 #define LAST_ADD_TIME_UPDATE(vq) 66 #define LAST_ADD_TIME_CHECK(vq) 67 #define LAST_ADD_TIME_INVALID(vq) 68 #endif 69 70 enum vq_layout { 71 VQ_LAYOUT_SPLIT = 0, 72 VQ_LAYOUT_PACKED, 73 VQ_LAYOUT_SPLIT_IN_ORDER, 74 VQ_LAYOUT_PACKED_IN_ORDER, 75 }; 76 77 struct vring_desc_state_split { 78 void *data; /* Data for callback. */ 79 80 /* Indirect desc table and extra table, if any. These two will be 81 * allocated together. So we won't stress more to the memory allocator. 82 */ 83 struct vring_desc *indir_desc; 84 u32 total_in_len; 85 }; 86 87 struct vring_desc_state_packed { 88 void *data; /* Data for callback. */ 89 90 /* Indirect desc table and extra table, if any. These two will be 91 * allocated together. So we won't stress more to the memory allocator. 92 */ 93 struct vring_packed_desc *indir_desc; 94 u16 num; /* Descriptor list length. */ 95 u16 last; /* The last desc state in a list. */ 96 u32 total_in_len; /* In length for the skipped buffer. */ 97 }; 98 99 struct vring_desc_extra { 100 dma_addr_t addr; /* Descriptor DMA addr. */ 101 u32 len; /* Descriptor length. */ 102 u16 flags; /* Descriptor flags. */ 103 u16 next; /* The next desc state in a list. */ 104 }; 105 106 struct vring_virtqueue_split { 107 /* Actual memory layout for this queue. */ 108 struct vring vring; 109 110 /* Last written value to avail->flags */ 111 u16 avail_flags_shadow; 112 113 /* 114 * Last written value to avail->idx in 115 * guest byte order. 116 */ 117 u16 avail_idx_shadow; 118 119 /* Per-descriptor state. */ 120 struct vring_desc_state_split *desc_state; 121 struct vring_desc_extra *desc_extra; 122 123 /* DMA address and size information */ 124 dma_addr_t queue_dma_addr; 125 size_t queue_size_in_bytes; 126 127 /* 128 * The parameters for creating vrings are reserved for creating new 129 * vring. 130 */ 131 u32 vring_align; 132 bool may_reduce_num; 133 }; 134 135 struct vring_virtqueue_packed { 136 /* Actual memory layout for this queue. */ 137 struct { 138 unsigned int num; 139 struct vring_packed_desc *desc; 140 struct vring_packed_desc_event *driver; 141 struct vring_packed_desc_event *device; 142 } vring; 143 144 /* Driver ring wrap counter. */ 145 bool avail_wrap_counter; 146 147 /* Avail used flags. */ 148 u16 avail_used_flags; 149 150 /* Index of the next avail descriptor. */ 151 u16 next_avail_idx; 152 153 /* 154 * Last written value to driver->flags in 155 * guest byte order. 156 */ 157 u16 event_flags_shadow; 158 159 /* Per-descriptor state. */ 160 struct vring_desc_state_packed *desc_state; 161 struct vring_desc_extra *desc_extra; 162 163 /* DMA address and size information */ 164 dma_addr_t ring_dma_addr; 165 dma_addr_t driver_event_dma_addr; 166 dma_addr_t device_event_dma_addr; 167 size_t ring_size_in_bytes; 168 size_t event_size_in_bytes; 169 }; 170 171 struct vring_virtqueue; 172 173 struct virtqueue_ops { 174 int (*add)(struct vring_virtqueue *vq, struct scatterlist *sgs[], 175 unsigned int total_sg, unsigned int out_sgs, 176 unsigned int in_sgs, void *data, 177 void *ctx, bool premapped, gfp_t gfp, 178 unsigned long attr); 179 void *(*get)(struct vring_virtqueue *vq, unsigned int *len, void **ctx); 180 bool (*kick_prepare)(struct vring_virtqueue *vq); 181 void (*disable_cb)(struct vring_virtqueue *vq); 182 bool (*enable_cb_delayed)(struct vring_virtqueue *vq); 183 unsigned int (*enable_cb_prepare)(struct vring_virtqueue *vq); 184 bool (*poll)(const struct vring_virtqueue *vq, 185 unsigned int last_used_idx); 186 void *(*detach_unused_buf)(struct vring_virtqueue *vq); 187 bool (*more_used)(const struct vring_virtqueue *vq); 188 int (*resize)(struct vring_virtqueue *vq, u32 num); 189 void (*reset)(struct vring_virtqueue *vq); 190 }; 191 192 struct vring_virtqueue { 193 struct virtqueue vq; 194 195 /* Is DMA API used? */ 196 bool use_map_api; 197 198 /* Can we use weak barriers? */ 199 bool weak_barriers; 200 201 /* Other side has made a mess, don't try any more. */ 202 bool broken; 203 204 /* Host supports indirect buffers */ 205 bool indirect; 206 207 /* Host publishes avail event idx */ 208 bool event; 209 210 enum vq_layout layout; 211 212 /* 213 * Without IN_ORDER it's the head of free buffer list. With 214 * IN_ORDER and SPLIT, it's the next available buffer 215 * index. With IN_ORDER and PACKED, it's unused. 216 */ 217 unsigned int free_head; 218 219 /* 220 * With IN_ORDER, once we see an in-order batch, this stores 221 * this last entry, and until we return the last buffer. 222 * After this, id is set to UINT_MAX to mark it invalid. 223 * Unused without IN_ORDER. 224 */ 225 struct used_entry { 226 u32 id; 227 u32 len; 228 } batch_last; 229 230 /* Number we've added since last sync. */ 231 unsigned int num_added; 232 233 /* Last used index we've seen. 234 * for split ring, it just contains last used index 235 * for packed ring: 236 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index. 237 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter. 238 */ 239 u16 last_used_idx; 240 241 /* With IN_ORDER and SPLIT, last descriptor id we used to 242 * detach buffer. 243 */ 244 u16 last_used; 245 246 /* Hint for event idx: already triggered no need to disable. */ 247 bool event_triggered; 248 249 union { 250 /* Available for split ring */ 251 struct vring_virtqueue_split split; 252 253 /* Available for packed ring */ 254 struct vring_virtqueue_packed packed; 255 }; 256 257 /* How to notify other side. FIXME: commonalize hcalls! */ 258 bool (*notify)(struct virtqueue *vq); 259 260 /* DMA, allocation, and size information */ 261 bool we_own_ring; 262 263 union virtio_map map; 264 265 #ifdef DEBUG 266 /* They're supposed to lock for us. */ 267 unsigned int in_use; 268 269 /* Figure out if their kicks are too delayed. */ 270 bool last_add_time_valid; 271 ktime_t last_add_time; 272 #endif 273 }; 274 275 /* 276 * Accessors for device-writable fields in virtio rings. 277 * These fields are concurrently written by the device and read by the driver. 278 * Use READ_ONCE() to prevent compiler optimizations, document the 279 * intentional data race and prevent KCSAN warnings. 280 */ 281 static inline u16 vring_read_split_used_idx(const struct vring_virtqueue *vq) 282 { 283 return virtio16_to_cpu(vq->vq.vdev, 284 READ_ONCE(vq->split.vring.used->idx)); 285 } 286 287 static inline u32 vring_read_split_used_id(const struct vring_virtqueue *vq, 288 u16 idx) 289 { 290 return virtio32_to_cpu(vq->vq.vdev, 291 READ_ONCE(vq->split.vring.used->ring[idx].id)); 292 } 293 294 static inline u32 vring_read_split_used_len(const struct vring_virtqueue *vq, u16 idx) 295 { 296 return virtio32_to_cpu(vq->vq.vdev, 297 READ_ONCE(vq->split.vring.used->ring[idx].len)); 298 } 299 300 static inline u16 vring_read_split_avail_event(const struct vring_virtqueue *vq) 301 { 302 return virtio16_to_cpu(vq->vq.vdev, 303 READ_ONCE(vring_avail_event(&vq->split.vring))); 304 } 305 306 static inline u16 vring_read_packed_desc_flags(const struct vring_virtqueue *vq, 307 u16 idx) 308 { 309 return le16_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].flags)); 310 } 311 312 static inline u16 vring_read_packed_desc_id(const struct vring_virtqueue *vq, 313 u16 idx) 314 { 315 return le16_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].id)); 316 } 317 318 static inline u32 vring_read_packed_desc_len(const struct vring_virtqueue *vq, 319 u16 idx) 320 { 321 return le32_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].len)); 322 } 323 324 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num); 325 static void vring_free(struct virtqueue *_vq); 326 327 /* 328 * Helpers. 329 */ 330 331 #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq) 332 333 334 static inline bool virtqueue_is_packed(const struct vring_virtqueue *vq) 335 { 336 return vq->layout == VQ_LAYOUT_PACKED || 337 vq->layout == VQ_LAYOUT_PACKED_IN_ORDER; 338 } 339 340 static inline bool virtqueue_is_in_order(const struct vring_virtqueue *vq) 341 { 342 return vq->layout == VQ_LAYOUT_SPLIT_IN_ORDER || 343 vq->layout == VQ_LAYOUT_PACKED_IN_ORDER; 344 } 345 346 static bool virtqueue_use_indirect(const struct vring_virtqueue *vq, 347 unsigned int total_sg) 348 { 349 /* 350 * If the host supports indirect descriptor tables, and we have multiple 351 * buffers, then go indirect. FIXME: tune this threshold 352 */ 353 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 354 } 355 356 /* 357 * Modern virtio devices have feature bits to specify whether they need a 358 * quirk and bypass the IOMMU. If not there, just use the DMA API. 359 * 360 * If there, the interaction between virtio and DMA API is messy. 361 * 362 * On most systems with virtio, physical addresses match bus addresses, 363 * and it doesn't particularly matter whether we use the DMA API. 364 * 365 * On some systems, including Xen and any system with a physical device 366 * that speaks virtio behind a physical IOMMU, we must use the DMA API 367 * for virtio DMA to work at all. 368 * 369 * On other systems, including SPARC and PPC64, virtio-pci devices are 370 * enumerated as though they are behind an IOMMU, but the virtio host 371 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 372 * there or somehow map everything as the identity. 373 * 374 * For the time being, we preserve historic behavior and bypass the DMA 375 * API. 376 * 377 * TODO: install a per-device DMA ops structure that does the right thing 378 * taking into account all the above quirks, and use the DMA API 379 * unconditionally on data path. 380 */ 381 382 static bool vring_use_map_api(const struct virtio_device *vdev) 383 { 384 if (!virtio_has_dma_quirk(vdev)) 385 return true; 386 387 /* Otherwise, we are left to guess. */ 388 /* 389 * In theory, it's possible to have a buggy QEMU-supposed 390 * emulated Q35 IOMMU and Xen enabled at the same time. On 391 * such a configuration, virtio has never worked and will 392 * not work without an even larger kludge. Instead, enable 393 * the DMA API if we're a Xen guest, which at least allows 394 * all of the sensible Xen configurations to work correctly. 395 */ 396 if (xen_domain()) 397 return true; 398 399 return false; 400 } 401 402 static bool vring_need_unmap_buffer(const struct vring_virtqueue *vring, 403 const struct vring_desc_extra *extra) 404 { 405 return vring->use_map_api && (extra->addr != DMA_MAPPING_ERROR); 406 } 407 408 size_t virtio_max_dma_size(const struct virtio_device *vdev) 409 { 410 size_t max_segment_size = SIZE_MAX; 411 412 if (vring_use_map_api(vdev)) { 413 if (vdev->map) { 414 max_segment_size = 415 vdev->map->max_mapping_size(vdev->vmap); 416 } else 417 max_segment_size = 418 dma_max_mapping_size(vdev->dev.parent); 419 } 420 421 return max_segment_size; 422 } 423 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 424 425 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 426 dma_addr_t *map_handle, gfp_t flag, 427 union virtio_map map) 428 { 429 if (vring_use_map_api(vdev)) { 430 return virtqueue_map_alloc_coherent(vdev, map, size, 431 map_handle, flag); 432 } else { 433 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 434 435 if (queue) { 436 phys_addr_t phys_addr = virt_to_phys(queue); 437 *map_handle = (dma_addr_t)phys_addr; 438 439 /* 440 * Sanity check: make sure we dind't truncate 441 * the address. The only arches I can find that 442 * have 64-bit phys_addr_t but 32-bit dma_addr_t 443 * are certain non-highmem MIPS and x86 444 * configurations, but these configurations 445 * should never allocate physical pages above 32 446 * bits, so this is fine. Just in case, throw a 447 * warning and abort if we end up with an 448 * unrepresentable address. 449 */ 450 if (WARN_ON_ONCE(*map_handle != phys_addr)) { 451 free_pages_exact(queue, PAGE_ALIGN(size)); 452 return NULL; 453 } 454 } 455 return queue; 456 } 457 } 458 459 static void vring_free_queue(struct virtio_device *vdev, size_t size, 460 void *queue, dma_addr_t map_handle, 461 union virtio_map map) 462 { 463 if (vring_use_map_api(vdev)) 464 virtqueue_map_free_coherent(vdev, map, size, 465 queue, map_handle); 466 else 467 free_pages_exact(queue, PAGE_ALIGN(size)); 468 } 469 470 /* 471 * The DMA ops on various arches are rather gnarly right now, and 472 * making all of the arch DMA ops work on the vring device itself 473 * is a mess. 474 */ 475 static struct device *vring_dma_dev(const struct vring_virtqueue *vq) 476 { 477 return vq->map.dma_dev; 478 } 479 480 static int vring_mapping_error(const struct vring_virtqueue *vq, 481 dma_addr_t addr) 482 { 483 struct virtio_device *vdev = vq->vq.vdev; 484 485 if (!vq->use_map_api) 486 return 0; 487 488 if (vdev->map) 489 return vdev->map->mapping_error(vq->map, addr); 490 else 491 return dma_mapping_error(vring_dma_dev(vq), addr); 492 } 493 494 /* Map one sg entry. */ 495 static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg, 496 enum dma_data_direction direction, dma_addr_t *addr, 497 u32 *len, bool premapped, unsigned long attr) 498 { 499 if (premapped) { 500 *addr = sg_dma_address(sg); 501 *len = sg_dma_len(sg); 502 return 0; 503 } 504 505 *len = sg->length; 506 507 if (!vq->use_map_api) { 508 /* 509 * If DMA is not used, KMSAN doesn't know that the scatterlist 510 * is initialized by the hardware. Explicitly check/unpoison it 511 * depending on the direction. 512 */ 513 kmsan_handle_dma(sg_phys(sg), sg->length, direction); 514 *addr = (dma_addr_t)sg_phys(sg); 515 return 0; 516 } 517 518 /* 519 * We can't use dma_map_sg, because we don't use scatterlists in 520 * the way it expects (we don't guarantee that the scatterlist 521 * will exist for the lifetime of the mapping). 522 */ 523 *addr = virtqueue_map_page_attrs(&vq->vq, sg_page(sg), 524 sg->offset, sg->length, 525 direction, attr); 526 527 if (vring_mapping_error(vq, *addr)) 528 return -ENOMEM; 529 530 return 0; 531 } 532 533 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 534 void *cpu_addr, size_t size, 535 enum dma_data_direction direction) 536 { 537 if (!vq->use_map_api) 538 return (dma_addr_t)virt_to_phys(cpu_addr); 539 540 return virtqueue_map_single_attrs(&vq->vq, cpu_addr, 541 size, direction, 0); 542 } 543 544 static void virtqueue_init(struct vring_virtqueue *vq, u32 num) 545 { 546 vq->vq.num_free = num; 547 548 if (virtqueue_is_packed(vq)) 549 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR); 550 else 551 vq->last_used_idx = 0; 552 553 vq->last_used = 0; 554 555 vq->event_triggered = false; 556 vq->num_added = 0; 557 558 #ifdef DEBUG 559 vq->in_use = false; 560 vq->last_add_time_valid = false; 561 #endif 562 } 563 564 565 /* 566 * Split ring specific functions - *_split(). 567 */ 568 569 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, 570 struct vring_desc_extra *extra) 571 { 572 u16 flags; 573 574 flags = extra->flags; 575 576 if (flags & VRING_DESC_F_INDIRECT) { 577 if (!vq->use_map_api) 578 goto out; 579 } else if (!vring_need_unmap_buffer(vq, extra)) 580 goto out; 581 582 virtqueue_unmap_page_attrs(&vq->vq, 583 extra->addr, 584 extra->len, 585 (flags & VRING_DESC_F_WRITE) ? 586 DMA_FROM_DEVICE : DMA_TO_DEVICE, 587 0); 588 589 out: 590 return extra->next; 591 } 592 593 static struct vring_desc *alloc_indirect_split(struct vring_virtqueue *vq, 594 unsigned int total_sg, 595 gfp_t gfp) 596 { 597 struct vring_desc_extra *extra; 598 struct vring_desc *desc; 599 unsigned int i, size; 600 601 /* 602 * We require lowmem mappings for the descriptors because 603 * otherwise virt_to_phys will give us bogus addresses in the 604 * virtqueue. 605 */ 606 gfp &= ~__GFP_HIGHMEM; 607 608 size = sizeof(*desc) * total_sg + sizeof(*extra) * total_sg; 609 610 desc = kmalloc(size, gfp); 611 if (!desc) 612 return NULL; 613 614 extra = (struct vring_desc_extra *)&desc[total_sg]; 615 616 for (i = 0; i < total_sg; i++) 617 extra[i].next = i + 1; 618 619 return desc; 620 } 621 622 static inline unsigned int virtqueue_add_desc_split(struct vring_virtqueue *vq, 623 struct vring_desc *desc, 624 struct vring_desc_extra *extra, 625 unsigned int i, 626 dma_addr_t addr, 627 unsigned int len, 628 u16 flags, bool premapped) 629 { 630 struct virtio_device *vdev = vq->vq.vdev; 631 u16 next; 632 633 desc[i].flags = cpu_to_virtio16(vdev, flags); 634 desc[i].addr = cpu_to_virtio64(vdev, addr); 635 desc[i].len = cpu_to_virtio32(vdev, len); 636 637 extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr; 638 extra[i].len = len; 639 extra[i].flags = flags; 640 641 next = extra[i].next; 642 643 desc[i].next = cpu_to_virtio16(vdev, next); 644 645 return next; 646 } 647 648 static inline int virtqueue_add_split(struct vring_virtqueue *vq, 649 struct scatterlist *sgs[], 650 unsigned int total_sg, 651 unsigned int out_sgs, 652 unsigned int in_sgs, 653 void *data, 654 void *ctx, 655 bool premapped, 656 gfp_t gfp, 657 unsigned long attr) 658 { 659 struct vring_desc_extra *extra; 660 struct scatterlist *sg; 661 struct vring_desc *desc; 662 unsigned int i, n, avail, descs_used, err_idx, sg_count = 0; 663 /* Total length for in-order */ 664 unsigned int total_in_len = 0; 665 int head; 666 bool indirect; 667 668 START_USE(vq); 669 670 BUG_ON(data == NULL); 671 BUG_ON(ctx && vq->indirect); 672 673 if (unlikely(vq->broken)) { 674 END_USE(vq); 675 return -EIO; 676 } 677 678 LAST_ADD_TIME_UPDATE(vq); 679 680 BUG_ON(total_sg == 0); 681 682 head = vq->free_head; 683 684 if (virtqueue_use_indirect(vq, total_sg)) 685 desc = alloc_indirect_split(vq, total_sg, gfp); 686 else { 687 desc = NULL; 688 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 689 } 690 691 if (desc) { 692 /* Use a single buffer which doesn't continue */ 693 indirect = true; 694 /* Set up rest to use this indirect table. */ 695 i = 0; 696 descs_used = 1; 697 extra = (struct vring_desc_extra *)&desc[total_sg]; 698 } else { 699 indirect = false; 700 desc = vq->split.vring.desc; 701 extra = vq->split.desc_extra; 702 i = head; 703 descs_used = total_sg; 704 } 705 706 if (unlikely(vq->vq.num_free < descs_used)) { 707 pr_debug("Can't add buf len %i - avail = %i\n", 708 descs_used, vq->vq.num_free); 709 /* FIXME: for historical reasons, we force a notify here if 710 * there are outgoing parts to the buffer. Presumably the 711 * host should service the ring ASAP. */ 712 if (out_sgs) 713 vq->notify(&vq->vq); 714 if (indirect) 715 kfree(desc); 716 END_USE(vq); 717 return -ENOSPC; 718 } 719 720 for (n = 0; n < out_sgs; n++) { 721 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 722 dma_addr_t addr; 723 u32 len; 724 u16 flags = 0; 725 726 if (++sg_count != total_sg) 727 flags |= VRING_DESC_F_NEXT; 728 729 if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr, &len, 730 premapped, attr)) 731 goto unmap_release; 732 733 /* Note that we trust indirect descriptor 734 * table since it use stream DMA mapping. 735 */ 736 i = virtqueue_add_desc_split(vq, desc, extra, i, addr, 737 len, flags, premapped); 738 } 739 } 740 for (; n < (out_sgs + in_sgs); n++) { 741 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 742 dma_addr_t addr; 743 u32 len; 744 u16 flags = VRING_DESC_F_WRITE; 745 746 if (++sg_count != total_sg) 747 flags |= VRING_DESC_F_NEXT; 748 749 if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr, &len, 750 premapped, attr)) 751 goto unmap_release; 752 753 /* Note that we trust indirect descriptor 754 * table since it use stream DMA mapping. 755 */ 756 i = virtqueue_add_desc_split(vq, desc, extra, i, addr, 757 len, flags, premapped); 758 total_in_len += len; 759 } 760 } 761 762 if (indirect) { 763 /* Now that the indirect table is filled in, map it. */ 764 dma_addr_t addr = vring_map_single( 765 vq, desc, total_sg * sizeof(struct vring_desc), 766 DMA_TO_DEVICE); 767 if (vring_mapping_error(vq, addr)) 768 goto unmap_release; 769 770 virtqueue_add_desc_split(vq, vq->split.vring.desc, 771 vq->split.desc_extra, 772 head, addr, 773 total_sg * sizeof(struct vring_desc), 774 VRING_DESC_F_INDIRECT, false); 775 } 776 777 /* We're using some buffers from the free list. */ 778 vq->vq.num_free -= descs_used; 779 780 /* Update free pointer */ 781 if (virtqueue_is_in_order(vq)) { 782 vq->free_head += descs_used; 783 if (vq->free_head >= vq->split.vring.num) 784 vq->free_head -= vq->split.vring.num; 785 vq->split.desc_state[head].total_in_len = total_in_len; 786 } else if (indirect) 787 vq->free_head = vq->split.desc_extra[head].next; 788 else 789 vq->free_head = i; 790 791 /* Store token and indirect buffer state. */ 792 vq->split.desc_state[head].data = data; 793 if (indirect) 794 vq->split.desc_state[head].indir_desc = desc; 795 else 796 vq->split.desc_state[head].indir_desc = ctx; 797 798 /* Put entry in available array (but don't update avail->idx until they 799 * do sync). */ 800 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 801 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(vq->vq.vdev, head); 802 803 /* Descriptors and available array need to be set before we expose the 804 * new available array entries. */ 805 virtio_wmb(vq->weak_barriers); 806 vq->split.avail_idx_shadow++; 807 vq->split.vring.avail->idx = cpu_to_virtio16(vq->vq.vdev, 808 vq->split.avail_idx_shadow); 809 vq->num_added++; 810 811 pr_debug("Added buffer head %i to %p\n", head, vq); 812 END_USE(vq); 813 814 /* This is very unlikely, but theoretically possible. Kick 815 * just in case. */ 816 if (unlikely(vq->num_added == (1 << 16) - 1)) 817 virtqueue_kick(&vq->vq); 818 819 return 0; 820 821 unmap_release: 822 err_idx = i; 823 824 if (indirect) 825 i = 0; 826 else 827 i = head; 828 829 for (n = 0; n < total_sg; n++) { 830 if (i == err_idx) 831 break; 832 833 i = vring_unmap_one_split(vq, &extra[i]); 834 } 835 836 if (indirect) 837 kfree(desc); 838 839 END_USE(vq); 840 return -ENOMEM; 841 } 842 843 static bool virtqueue_kick_prepare_split(struct vring_virtqueue *vq) 844 { 845 u16 new, old; 846 bool needs_kick; 847 848 START_USE(vq); 849 /* We need to expose available array entries before checking avail 850 * event. */ 851 virtio_mb(vq->weak_barriers); 852 853 old = vq->split.avail_idx_shadow - vq->num_added; 854 new = vq->split.avail_idx_shadow; 855 vq->num_added = 0; 856 857 LAST_ADD_TIME_CHECK(vq); 858 LAST_ADD_TIME_INVALID(vq); 859 860 if (vq->event) { 861 needs_kick = vring_need_event(vring_read_split_avail_event(vq), 862 new, old); 863 } else { 864 needs_kick = !(vq->split.vring.used->flags & 865 cpu_to_virtio16(vq->vq.vdev, 866 VRING_USED_F_NO_NOTIFY)); 867 } 868 END_USE(vq); 869 return needs_kick; 870 } 871 872 static void detach_indirect_split(struct vring_virtqueue *vq, 873 unsigned int head) 874 { 875 struct vring_desc_extra *extra = vq->split.desc_extra; 876 struct vring_desc *indir_desc = vq->split.desc_state[head].indir_desc; 877 unsigned int j; 878 u32 len, num; 879 880 /* Free the indirect table, if any, now that it's unmapped. */ 881 if (!indir_desc) 882 return; 883 len = vq->split.desc_extra[head].len; 884 885 BUG_ON(!(vq->split.desc_extra[head].flags & 886 VRING_DESC_F_INDIRECT)); 887 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 888 889 num = len / sizeof(struct vring_desc); 890 891 extra = (struct vring_desc_extra *)&indir_desc[num]; 892 893 if (vq->use_map_api) { 894 for (j = 0; j < num; j++) 895 vring_unmap_one_split(vq, &extra[j]); 896 } 897 898 kfree(indir_desc); 899 vq->split.desc_state[head].indir_desc = NULL; 900 } 901 902 static unsigned detach_buf_split_in_order(struct vring_virtqueue *vq, 903 unsigned int head, 904 void **ctx) 905 { 906 struct vring_desc_extra *extra; 907 unsigned int i; 908 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 909 910 /* Clear data ptr. */ 911 vq->split.desc_state[head].data = NULL; 912 913 extra = vq->split.desc_extra; 914 915 /* Put back on free list: unmap first-level descriptors and find end */ 916 i = head; 917 918 while (vq->split.vring.desc[i].flags & nextflag) { 919 i = vring_unmap_one_split(vq, &extra[i]); 920 vq->vq.num_free++; 921 } 922 923 vring_unmap_one_split(vq, &extra[i]); 924 925 /* Plus final descriptor */ 926 vq->vq.num_free++; 927 928 if (vq->indirect) 929 detach_indirect_split(vq, head); 930 else if (ctx) 931 *ctx = vq->split.desc_state[head].indir_desc; 932 933 return i; 934 } 935 936 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 937 void **ctx) 938 { 939 unsigned int i = detach_buf_split_in_order(vq, head, ctx); 940 941 vq->split.desc_extra[i].next = vq->free_head; 942 vq->free_head = head; 943 } 944 945 static bool virtqueue_poll_split(const struct vring_virtqueue *vq, 946 unsigned int last_used_idx) 947 { 948 return (u16)last_used_idx != vring_read_split_used_idx(vq); 949 } 950 951 static bool more_used_split(const struct vring_virtqueue *vq) 952 { 953 return virtqueue_poll_split(vq, vq->last_used_idx); 954 } 955 956 static bool more_used_split_in_order(const struct vring_virtqueue *vq) 957 { 958 if (vq->batch_last.id != UINT_MAX) 959 return true; 960 961 return virtqueue_poll_split(vq, vq->last_used_idx); 962 } 963 964 static void *virtqueue_get_buf_ctx_split(struct vring_virtqueue *vq, 965 unsigned int *len, 966 void **ctx) 967 { 968 void *ret; 969 unsigned int i; 970 u16 last_used; 971 972 START_USE(vq); 973 974 if (unlikely(vq->broken)) { 975 END_USE(vq); 976 return NULL; 977 } 978 979 if (!more_used_split(vq)) { 980 pr_debug("No more buffers in queue\n"); 981 END_USE(vq); 982 return NULL; 983 } 984 985 /* Only get used array entries after they have been exposed by host. */ 986 virtio_rmb(vq->weak_barriers); 987 988 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 989 i = vring_read_split_used_id(vq, last_used); 990 *len = vring_read_split_used_len(vq, last_used); 991 992 if (unlikely(i >= vq->split.vring.num)) { 993 BAD_RING(vq, "id %u out of range\n", i); 994 return NULL; 995 } 996 if (unlikely(!vq->split.desc_state[i].data)) { 997 BAD_RING(vq, "id %u is not a head!\n", i); 998 return NULL; 999 } 1000 1001 /* detach_buf_split clears data, so grab it now. */ 1002 ret = vq->split.desc_state[i].data; 1003 detach_buf_split(vq, i, ctx); 1004 vq->last_used_idx++; 1005 /* If we expect an interrupt for the next entry, tell host 1006 * by writing event index and flush out the write before 1007 * the read in the next get_buf call. */ 1008 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 1009 virtio_store_mb(vq->weak_barriers, 1010 &vring_used_event(&vq->split.vring), 1011 cpu_to_virtio16(vq->vq.vdev, vq->last_used_idx)); 1012 1013 LAST_ADD_TIME_INVALID(vq); 1014 1015 END_USE(vq); 1016 return ret; 1017 } 1018 1019 static void *virtqueue_get_buf_ctx_split_in_order(struct vring_virtqueue *vq, 1020 unsigned int *len, 1021 void **ctx) 1022 { 1023 void *ret; 1024 unsigned int num = vq->split.vring.num; 1025 unsigned int num_free = vq->vq.num_free; 1026 u16 last_used, last_used_idx; 1027 1028 START_USE(vq); 1029 1030 if (unlikely(vq->broken)) { 1031 END_USE(vq); 1032 return NULL; 1033 } 1034 1035 last_used = vq->last_used & (num - 1); 1036 last_used_idx = vq->last_used_idx & (num - 1); 1037 1038 if (vq->batch_last.id == UINT_MAX) { 1039 if (!more_used_split_in_order(vq)) { 1040 pr_debug("No more buffers in queue\n"); 1041 END_USE(vq); 1042 return NULL; 1043 } 1044 1045 /* 1046 * Only get used array entries after they have been 1047 * exposed by host. 1048 */ 1049 virtio_rmb(vq->weak_barriers); 1050 1051 vq->batch_last.id = vring_read_split_used_id(vq, last_used_idx); 1052 vq->batch_last.len = vring_read_split_used_len(vq, last_used_idx); 1053 } 1054 1055 if (vq->batch_last.id == last_used) { 1056 vq->batch_last.id = UINT_MAX; 1057 *len = vq->batch_last.len; 1058 } else { 1059 *len = vq->split.desc_state[last_used].total_in_len; 1060 } 1061 1062 if (unlikely(!vq->split.desc_state[last_used].data)) { 1063 BAD_RING(vq, "id %u is not a head!\n", last_used); 1064 return NULL; 1065 } 1066 1067 /* detach_buf_split clears data, so grab it now. */ 1068 ret = vq->split.desc_state[last_used].data; 1069 detach_buf_split_in_order(vq, last_used, ctx); 1070 1071 vq->last_used_idx++; 1072 vq->last_used += (vq->vq.num_free - num_free); 1073 /* If we expect an interrupt for the next entry, tell host 1074 * by writing event index and flush out the write before 1075 * the read in the next get_buf call. */ 1076 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 1077 virtio_store_mb(vq->weak_barriers, 1078 &vring_used_event(&vq->split.vring), 1079 cpu_to_virtio16(vq->vq.vdev, vq->last_used_idx)); 1080 1081 LAST_ADD_TIME_INVALID(vq); 1082 1083 END_USE(vq); 1084 return ret; 1085 } 1086 1087 static void virtqueue_disable_cb_split(struct vring_virtqueue *vq) 1088 { 1089 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 1090 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 1091 1092 /* 1093 * If device triggered an event already it won't trigger one again: 1094 * no need to disable. 1095 */ 1096 if (vq->event_triggered) 1097 return; 1098 1099 if (vq->event) 1100 /* TODO: this is a hack. Figure out a cleaner value to write. */ 1101 vring_used_event(&vq->split.vring) = 0x0; 1102 else 1103 vq->split.vring.avail->flags = 1104 cpu_to_virtio16(vq->vq.vdev, 1105 vq->split.avail_flags_shadow); 1106 } 1107 } 1108 1109 static unsigned int virtqueue_enable_cb_prepare_split(struct vring_virtqueue *vq) 1110 { 1111 u16 last_used_idx; 1112 1113 START_USE(vq); 1114 1115 /* We optimistically turn back on interrupts, then check if there was 1116 * more to do. */ 1117 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 1118 * either clear the flags bit or point the event index at the next 1119 * entry. Always do both to keep code simple. */ 1120 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 1121 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 1122 if (!vq->event) 1123 vq->split.vring.avail->flags = 1124 cpu_to_virtio16(vq->vq.vdev, 1125 vq->split.avail_flags_shadow); 1126 } 1127 vring_used_event(&vq->split.vring) = cpu_to_virtio16(vq->vq.vdev, 1128 last_used_idx = vq->last_used_idx); 1129 END_USE(vq); 1130 return last_used_idx; 1131 } 1132 1133 static bool virtqueue_enable_cb_delayed_split(struct vring_virtqueue *vq) 1134 { 1135 u16 bufs; 1136 1137 START_USE(vq); 1138 1139 /* We optimistically turn back on interrupts, then check if there was 1140 * more to do. */ 1141 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 1142 * either clear the flags bit or point the event index at the next 1143 * entry. Always update the event index to keep code simple. */ 1144 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 1145 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 1146 if (!vq->event) 1147 vq->split.vring.avail->flags = 1148 cpu_to_virtio16(vq->vq.vdev, 1149 vq->split.avail_flags_shadow); 1150 } 1151 /* TODO: tune this threshold */ 1152 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 1153 1154 virtio_store_mb(vq->weak_barriers, 1155 &vring_used_event(&vq->split.vring), 1156 cpu_to_virtio16(vq->vq.vdev, vq->last_used_idx + bufs)); 1157 1158 if (unlikely((u16)(vring_read_split_used_idx(vq) 1159 - vq->last_used_idx) > bufs)) { 1160 END_USE(vq); 1161 return false; 1162 } 1163 1164 END_USE(vq); 1165 return true; 1166 } 1167 1168 static void *virtqueue_detach_unused_buf_split(struct vring_virtqueue *vq) 1169 { 1170 unsigned int i; 1171 void *buf; 1172 1173 START_USE(vq); 1174 1175 for (i = 0; i < vq->split.vring.num; i++) { 1176 if (!vq->split.desc_state[i].data) 1177 continue; 1178 /* detach_buf_split clears data, so grab it now. */ 1179 buf = vq->split.desc_state[i].data; 1180 if (virtqueue_is_in_order(vq)) 1181 detach_buf_split_in_order(vq, i, NULL); 1182 else 1183 detach_buf_split(vq, i, NULL); 1184 vq->split.avail_idx_shadow--; 1185 vq->split.vring.avail->idx = cpu_to_virtio16(vq->vq.vdev, 1186 vq->split.avail_idx_shadow); 1187 END_USE(vq); 1188 return buf; 1189 } 1190 /* That should have freed everything. */ 1191 BUG_ON(vq->vq.num_free != vq->split.vring.num); 1192 1193 END_USE(vq); 1194 return NULL; 1195 } 1196 1197 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split, 1198 struct vring_virtqueue *vq) 1199 { 1200 struct virtio_device *vdev; 1201 1202 vdev = vq->vq.vdev; 1203 1204 vring_split->avail_flags_shadow = 0; 1205 vring_split->avail_idx_shadow = 0; 1206 1207 /* No callback? Tell other side not to bother us. */ 1208 if (!vq->vq.callback) { 1209 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 1210 if (!vq->event) 1211 vring_split->vring.avail->flags = cpu_to_virtio16(vdev, 1212 vring_split->avail_flags_shadow); 1213 } 1214 } 1215 1216 static void virtqueue_reset_split(struct vring_virtqueue *vq) 1217 { 1218 int num; 1219 1220 num = vq->split.vring.num; 1221 1222 vq->split.vring.avail->flags = 0; 1223 vq->split.vring.avail->idx = 0; 1224 1225 /* reset avail event */ 1226 vq->split.vring.avail->ring[num] = 0; 1227 1228 vq->split.vring.used->flags = 0; 1229 vq->split.vring.used->idx = 0; 1230 1231 /* reset used event */ 1232 *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0; 1233 1234 virtqueue_init(vq, num); 1235 1236 virtqueue_vring_init_split(&vq->split, vq); 1237 } 1238 1239 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq, 1240 struct vring_virtqueue_split *vring_split) 1241 { 1242 vq->split = *vring_split; 1243 1244 /* Put everything in free lists. */ 1245 vq->free_head = 0; 1246 vq->batch_last.id = UINT_MAX; 1247 } 1248 1249 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split) 1250 { 1251 struct vring_desc_state_split *state; 1252 struct vring_desc_extra *extra; 1253 u32 num = vring_split->vring.num; 1254 1255 state = kmalloc_objs(struct vring_desc_state_split, num); 1256 if (!state) 1257 goto err_state; 1258 1259 extra = vring_alloc_desc_extra(num); 1260 if (!extra) 1261 goto err_extra; 1262 1263 memset(state, 0, num * sizeof(struct vring_desc_state_split)); 1264 1265 vring_split->desc_state = state; 1266 vring_split->desc_extra = extra; 1267 return 0; 1268 1269 err_extra: 1270 kfree(state); 1271 err_state: 1272 return -ENOMEM; 1273 } 1274 1275 static void vring_free_split(struct vring_virtqueue_split *vring_split, 1276 struct virtio_device *vdev, 1277 union virtio_map map) 1278 { 1279 vring_free_queue(vdev, vring_split->queue_size_in_bytes, 1280 vring_split->vring.desc, 1281 vring_split->queue_dma_addr, 1282 map); 1283 1284 kfree(vring_split->desc_state); 1285 kfree(vring_split->desc_extra); 1286 } 1287 1288 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, 1289 struct virtio_device *vdev, 1290 u32 num, 1291 unsigned int vring_align, 1292 bool may_reduce_num, 1293 union virtio_map map) 1294 { 1295 void *queue = NULL; 1296 dma_addr_t dma_addr; 1297 1298 /* We assume num is a power of 2. */ 1299 if (!is_power_of_2(num)) { 1300 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 1301 return -EINVAL; 1302 } 1303 1304 /* TODO: allocate each queue chunk individually */ 1305 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 1306 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1307 &dma_addr, 1308 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1309 map); 1310 if (queue) 1311 break; 1312 if (!may_reduce_num) 1313 return -ENOMEM; 1314 } 1315 1316 if (!num) 1317 return -ENOMEM; 1318 1319 if (!queue) { 1320 /* Try to get a single page. You are my only hope! */ 1321 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1322 &dma_addr, GFP_KERNEL | __GFP_ZERO, 1323 map); 1324 } 1325 if (!queue) 1326 return -ENOMEM; 1327 1328 vring_init(&vring_split->vring, num, queue, vring_align); 1329 1330 vring_split->queue_dma_addr = dma_addr; 1331 vring_split->queue_size_in_bytes = vring_size(num, vring_align); 1332 1333 vring_split->vring_align = vring_align; 1334 vring_split->may_reduce_num = may_reduce_num; 1335 1336 return 0; 1337 } 1338 1339 static const struct virtqueue_ops split_ops; 1340 1341 static struct virtqueue *__vring_new_virtqueue_split(unsigned int index, 1342 struct vring_virtqueue_split *vring_split, 1343 struct virtio_device *vdev, 1344 bool weak_barriers, 1345 bool context, 1346 bool (*notify)(struct virtqueue *), 1347 void (*callback)(struct virtqueue *), 1348 const char *name, 1349 union virtio_map map) 1350 { 1351 struct vring_virtqueue *vq; 1352 int err; 1353 1354 vq = kmalloc_obj(*vq); 1355 if (!vq) 1356 return NULL; 1357 1358 vq->vq.callback = callback; 1359 vq->vq.vdev = vdev; 1360 vq->vq.name = name; 1361 vq->vq.index = index; 1362 vq->vq.reset = false; 1363 vq->we_own_ring = false; 1364 vq->notify = notify; 1365 vq->weak_barriers = weak_barriers; 1366 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 1367 vq->broken = true; 1368 #else 1369 vq->broken = false; 1370 #endif 1371 vq->map = map; 1372 vq->use_map_api = vring_use_map_api(vdev); 1373 1374 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1375 !context; 1376 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1377 vq->layout = virtio_has_feature(vdev, VIRTIO_F_IN_ORDER) ? 1378 VQ_LAYOUT_SPLIT_IN_ORDER : VQ_LAYOUT_SPLIT; 1379 1380 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 1381 vq->weak_barriers = false; 1382 1383 err = vring_alloc_state_extra_split(vring_split); 1384 if (err) { 1385 kfree(vq); 1386 return NULL; 1387 } 1388 1389 virtqueue_vring_init_split(vring_split, vq); 1390 1391 virtqueue_init(vq, vring_split->vring.num); 1392 virtqueue_vring_attach_split(vq, vring_split); 1393 1394 spin_lock(&vdev->vqs_list_lock); 1395 list_add_tail(&vq->vq.list, &vdev->vqs); 1396 spin_unlock(&vdev->vqs_list_lock); 1397 return &vq->vq; 1398 } 1399 1400 static struct virtqueue *vring_create_virtqueue_split( 1401 unsigned int index, 1402 unsigned int num, 1403 unsigned int vring_align, 1404 struct virtio_device *vdev, 1405 bool weak_barriers, 1406 bool may_reduce_num, 1407 bool context, 1408 bool (*notify)(struct virtqueue *), 1409 void (*callback)(struct virtqueue *), 1410 const char *name, 1411 union virtio_map map) 1412 { 1413 struct vring_virtqueue_split vring_split = {}; 1414 struct virtqueue *vq; 1415 int err; 1416 1417 err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align, 1418 may_reduce_num, map); 1419 if (err) 1420 return NULL; 1421 1422 vq = __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers, 1423 context, notify, callback, name, map); 1424 if (!vq) { 1425 vring_free_split(&vring_split, vdev, map); 1426 return NULL; 1427 } 1428 1429 to_vvq(vq)->we_own_ring = true; 1430 1431 return vq; 1432 } 1433 1434 static int virtqueue_resize_split(struct vring_virtqueue *vq, u32 num) 1435 { 1436 struct vring_virtqueue_split vring_split = {}; 1437 struct virtio_device *vdev = vq->vq.vdev; 1438 int err; 1439 1440 err = vring_alloc_queue_split(&vring_split, vdev, num, 1441 vq->split.vring_align, 1442 vq->split.may_reduce_num, 1443 vq->map); 1444 if (err) 1445 goto err; 1446 1447 err = vring_alloc_state_extra_split(&vring_split); 1448 if (err) 1449 goto err_state_extra; 1450 1451 vring_free(&vq->vq); 1452 1453 virtqueue_vring_init_split(&vring_split, vq); 1454 1455 virtqueue_init(vq, vring_split.vring.num); 1456 virtqueue_vring_attach_split(vq, &vring_split); 1457 1458 return 0; 1459 1460 err_state_extra: 1461 vring_free_split(&vring_split, vdev, vq->map); 1462 err: 1463 virtqueue_reset_split(vq); 1464 return -ENOMEM; 1465 } 1466 1467 1468 /* 1469 * Packed ring specific functions - *_packed(). 1470 */ 1471 static bool packed_used_wrap_counter(u16 last_used_idx) 1472 { 1473 return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 1474 } 1475 1476 static u16 packed_last_used(u16 last_used_idx) 1477 { 1478 return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 1479 } 1480 1481 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, 1482 const struct vring_desc_extra *extra) 1483 { 1484 u16 flags; 1485 1486 flags = extra->flags; 1487 1488 if (flags & VRING_DESC_F_INDIRECT) { 1489 if (!vq->use_map_api) 1490 return; 1491 } else if (!vring_need_unmap_buffer(vq, extra)) 1492 return; 1493 1494 virtqueue_unmap_page_attrs(&vq->vq, 1495 extra->addr, extra->len, 1496 (flags & VRING_DESC_F_WRITE) ? 1497 DMA_FROM_DEVICE : DMA_TO_DEVICE, 1498 0); 1499 } 1500 1501 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 1502 gfp_t gfp) 1503 { 1504 struct vring_desc_extra *extra; 1505 struct vring_packed_desc *desc; 1506 int i, size; 1507 1508 /* 1509 * We require lowmem mappings for the descriptors because 1510 * otherwise virt_to_phys will give us bogus addresses in the 1511 * virtqueue. 1512 */ 1513 gfp &= ~__GFP_HIGHMEM; 1514 1515 size = (sizeof(*desc) + sizeof(*extra)) * total_sg; 1516 1517 desc = kmalloc(size, gfp); 1518 if (!desc) 1519 return NULL; 1520 1521 extra = (struct vring_desc_extra *)&desc[total_sg]; 1522 1523 for (i = 0; i < total_sg; i++) 1524 extra[i].next = i + 1; 1525 1526 return desc; 1527 } 1528 1529 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 1530 struct scatterlist *sgs[], 1531 unsigned int total_sg, 1532 unsigned int out_sgs, 1533 unsigned int in_sgs, 1534 void *data, 1535 bool premapped, 1536 gfp_t gfp, 1537 u16 id, 1538 unsigned long attr) 1539 { 1540 struct vring_desc_extra *extra; 1541 struct vring_packed_desc *desc; 1542 struct scatterlist *sg; 1543 unsigned int i, n, err_idx, len, total_in_len = 0; 1544 u16 head; 1545 dma_addr_t addr; 1546 1547 head = vq->packed.next_avail_idx; 1548 desc = alloc_indirect_packed(total_sg, gfp); 1549 if (!desc) 1550 return -ENOMEM; 1551 1552 extra = (struct vring_desc_extra *)&desc[total_sg]; 1553 1554 if (unlikely(vq->vq.num_free < 1)) { 1555 pr_debug("Can't add buf len 1 - avail = 0\n"); 1556 kfree(desc); 1557 END_USE(vq); 1558 return -ENOSPC; 1559 } 1560 1561 i = 0; 1562 1563 for (n = 0; n < out_sgs + in_sgs; n++) { 1564 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1565 if (vring_map_one_sg(vq, sg, n < out_sgs ? 1566 DMA_TO_DEVICE : DMA_FROM_DEVICE, 1567 &addr, &len, premapped, attr)) 1568 goto unmap_release; 1569 1570 desc[i].flags = cpu_to_le16(n < out_sgs ? 1571 0 : VRING_DESC_F_WRITE); 1572 desc[i].addr = cpu_to_le64(addr); 1573 desc[i].len = cpu_to_le32(len); 1574 1575 if (unlikely(vq->use_map_api)) { 1576 extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr; 1577 extra[i].len = len; 1578 extra[i].flags = n < out_sgs ? 0 : VRING_DESC_F_WRITE; 1579 } 1580 1581 if (n >= out_sgs) 1582 total_in_len += len; 1583 i++; 1584 } 1585 } 1586 1587 /* Now that the indirect table is filled in, map it. */ 1588 addr = vring_map_single(vq, desc, 1589 total_sg * sizeof(struct vring_packed_desc), 1590 DMA_TO_DEVICE); 1591 if (vring_mapping_error(vq, addr)) 1592 goto unmap_release; 1593 1594 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1595 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1596 sizeof(struct vring_packed_desc)); 1597 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1598 1599 if (vq->use_map_api) { 1600 vq->packed.desc_extra[id].addr = addr; 1601 vq->packed.desc_extra[id].len = total_sg * 1602 sizeof(struct vring_packed_desc); 1603 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1604 vq->packed.avail_used_flags; 1605 } 1606 1607 /* 1608 * A driver MUST NOT make the first descriptor in the list 1609 * available before all subsequent descriptors comprising 1610 * the list are made available. 1611 */ 1612 virtio_wmb(vq->weak_barriers); 1613 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1614 vq->packed.avail_used_flags); 1615 1616 /* We're using some buffers from the free list. */ 1617 vq->vq.num_free -= 1; 1618 1619 /* Update free pointer */ 1620 n = head + 1; 1621 if (n >= vq->packed.vring.num) { 1622 n = 0; 1623 vq->packed.avail_wrap_counter ^= 1; 1624 vq->packed.avail_used_flags ^= 1625 1 << VRING_PACKED_DESC_F_AVAIL | 1626 1 << VRING_PACKED_DESC_F_USED; 1627 } 1628 vq->packed.next_avail_idx = n; 1629 if (!virtqueue_is_in_order(vq)) 1630 vq->free_head = vq->packed.desc_extra[id].next; 1631 1632 /* Store token and indirect buffer state. */ 1633 vq->packed.desc_state[id].num = 1; 1634 vq->packed.desc_state[id].data = data; 1635 vq->packed.desc_state[id].indir_desc = desc; 1636 vq->packed.desc_state[id].last = id; 1637 vq->packed.desc_state[id].total_in_len = total_in_len; 1638 1639 vq->num_added += 1; 1640 1641 pr_debug("Added buffer head %i to %p\n", head, vq); 1642 END_USE(vq); 1643 1644 return 0; 1645 1646 unmap_release: 1647 err_idx = i; 1648 1649 for (i = 0; i < err_idx; i++) 1650 vring_unmap_extra_packed(vq, &extra[i]); 1651 1652 kfree(desc); 1653 1654 END_USE(vq); 1655 return -ENOMEM; 1656 } 1657 1658 static inline int virtqueue_add_packed(struct vring_virtqueue *vq, 1659 struct scatterlist *sgs[], 1660 unsigned int total_sg, 1661 unsigned int out_sgs, 1662 unsigned int in_sgs, 1663 void *data, 1664 void *ctx, 1665 bool premapped, 1666 gfp_t gfp, 1667 unsigned long attr) 1668 { 1669 struct vring_packed_desc *desc; 1670 struct scatterlist *sg; 1671 unsigned int i, n, c, descs_used, err_idx, len; 1672 __le16 head_flags, flags; 1673 u16 head, id, prev, curr, avail_used_flags; 1674 int err; 1675 1676 START_USE(vq); 1677 1678 BUG_ON(data == NULL); 1679 BUG_ON(ctx && vq->indirect); 1680 1681 if (unlikely(vq->broken)) { 1682 END_USE(vq); 1683 return -EIO; 1684 } 1685 1686 LAST_ADD_TIME_UPDATE(vq); 1687 1688 BUG_ON(total_sg == 0); 1689 1690 if (virtqueue_use_indirect(vq, total_sg)) { 1691 id = vq->free_head; 1692 BUG_ON(id == vq->packed.vring.num); 1693 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1694 in_sgs, data, premapped, gfp, 1695 id, attr); 1696 if (err != -ENOMEM) { 1697 END_USE(vq); 1698 return err; 1699 } 1700 1701 /* fall back on direct */ 1702 } 1703 1704 head = vq->packed.next_avail_idx; 1705 avail_used_flags = vq->packed.avail_used_flags; 1706 1707 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1708 1709 desc = vq->packed.vring.desc; 1710 i = head; 1711 descs_used = total_sg; 1712 1713 if (unlikely(vq->vq.num_free < descs_used)) { 1714 pr_debug("Can't add buf len %i - avail = %i\n", 1715 descs_used, vq->vq.num_free); 1716 END_USE(vq); 1717 return -ENOSPC; 1718 } 1719 1720 id = vq->free_head; 1721 BUG_ON(id == vq->packed.vring.num); 1722 1723 curr = id; 1724 c = 0; 1725 for (n = 0; n < out_sgs + in_sgs; n++) { 1726 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1727 dma_addr_t addr; 1728 1729 if (vring_map_one_sg(vq, sg, n < out_sgs ? 1730 DMA_TO_DEVICE : DMA_FROM_DEVICE, 1731 &addr, &len, premapped, attr)) 1732 goto unmap_release; 1733 1734 flags = cpu_to_le16(vq->packed.avail_used_flags | 1735 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1736 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1737 if (i == head) 1738 head_flags = flags; 1739 else 1740 desc[i].flags = flags; 1741 1742 desc[i].addr = cpu_to_le64(addr); 1743 desc[i].len = cpu_to_le32(len); 1744 desc[i].id = cpu_to_le16(id); 1745 1746 if (unlikely(vq->use_map_api)) { 1747 vq->packed.desc_extra[curr].addr = premapped ? 1748 DMA_MAPPING_ERROR : addr; 1749 vq->packed.desc_extra[curr].len = len; 1750 vq->packed.desc_extra[curr].flags = 1751 le16_to_cpu(flags); 1752 } 1753 prev = curr; 1754 curr = vq->packed.desc_extra[curr].next; 1755 1756 if ((unlikely(++i >= vq->packed.vring.num))) { 1757 i = 0; 1758 vq->packed.avail_used_flags ^= 1759 1 << VRING_PACKED_DESC_F_AVAIL | 1760 1 << VRING_PACKED_DESC_F_USED; 1761 } 1762 } 1763 } 1764 1765 if (i <= head) 1766 vq->packed.avail_wrap_counter ^= 1; 1767 1768 /* We're using some buffers from the free list. */ 1769 vq->vq.num_free -= descs_used; 1770 1771 /* Update free pointer */ 1772 vq->packed.next_avail_idx = i; 1773 vq->free_head = curr; 1774 1775 /* Store token. */ 1776 vq->packed.desc_state[id].num = descs_used; 1777 vq->packed.desc_state[id].data = data; 1778 vq->packed.desc_state[id].indir_desc = ctx; 1779 vq->packed.desc_state[id].last = prev; 1780 1781 /* 1782 * A driver MUST NOT make the first descriptor in the list 1783 * available before all subsequent descriptors comprising 1784 * the list are made available. 1785 */ 1786 virtio_wmb(vq->weak_barriers); 1787 vq->packed.vring.desc[head].flags = head_flags; 1788 vq->num_added += descs_used; 1789 1790 pr_debug("Added buffer head %i to %p\n", head, vq); 1791 END_USE(vq); 1792 1793 return 0; 1794 1795 unmap_release: 1796 err_idx = i; 1797 i = head; 1798 curr = vq->free_head; 1799 1800 vq->packed.avail_used_flags = avail_used_flags; 1801 1802 for (n = 0; n < total_sg; n++) { 1803 if (i == err_idx) 1804 break; 1805 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); 1806 curr = vq->packed.desc_extra[curr].next; 1807 i++; 1808 if (i >= vq->packed.vring.num) 1809 i = 0; 1810 } 1811 1812 END_USE(vq); 1813 return -EIO; 1814 } 1815 1816 static inline int virtqueue_add_packed_in_order(struct vring_virtqueue *vq, 1817 struct scatterlist *sgs[], 1818 unsigned int total_sg, 1819 unsigned int out_sgs, 1820 unsigned int in_sgs, 1821 void *data, 1822 void *ctx, 1823 bool premapped, 1824 gfp_t gfp, 1825 unsigned long attr) 1826 { 1827 struct vring_packed_desc *desc; 1828 struct scatterlist *sg; 1829 unsigned int i, n, sg_count, err_idx, total_in_len = 0; 1830 __le16 head_flags, flags; 1831 u16 head, avail_used_flags; 1832 bool avail_wrap_counter; 1833 int err; 1834 1835 START_USE(vq); 1836 1837 BUG_ON(data == NULL); 1838 BUG_ON(ctx && vq->indirect); 1839 1840 if (unlikely(vq->broken)) { 1841 END_USE(vq); 1842 return -EIO; 1843 } 1844 1845 LAST_ADD_TIME_UPDATE(vq); 1846 1847 BUG_ON(total_sg == 0); 1848 1849 if (virtqueue_use_indirect(vq, total_sg)) { 1850 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1851 in_sgs, data, premapped, gfp, 1852 vq->packed.next_avail_idx, 1853 attr); 1854 if (err != -ENOMEM) { 1855 END_USE(vq); 1856 return err; 1857 } 1858 1859 /* fall back on direct */ 1860 } 1861 1862 head = vq->packed.next_avail_idx; 1863 avail_used_flags = vq->packed.avail_used_flags; 1864 avail_wrap_counter = vq->packed.avail_wrap_counter; 1865 1866 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1867 1868 desc = vq->packed.vring.desc; 1869 i = head; 1870 1871 if (unlikely(vq->vq.num_free < total_sg)) { 1872 pr_debug("Can't add buf len %i - avail = %i\n", 1873 total_sg, vq->vq.num_free); 1874 END_USE(vq); 1875 return -ENOSPC; 1876 } 1877 1878 sg_count = 0; 1879 for (n = 0; n < out_sgs + in_sgs; n++) { 1880 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1881 dma_addr_t addr; 1882 u32 len; 1883 1884 flags = 0; 1885 if (++sg_count != total_sg) 1886 flags |= cpu_to_le16(VRING_DESC_F_NEXT); 1887 if (n >= out_sgs) 1888 flags |= cpu_to_le16(VRING_DESC_F_WRITE); 1889 1890 if (vring_map_one_sg(vq, sg, n < out_sgs ? 1891 DMA_TO_DEVICE : DMA_FROM_DEVICE, 1892 &addr, &len, premapped, attr)) 1893 goto unmap_release; 1894 1895 flags |= cpu_to_le16(vq->packed.avail_used_flags); 1896 1897 if (i == head) 1898 head_flags = flags; 1899 else 1900 desc[i].flags = flags; 1901 1902 desc[i].addr = cpu_to_le64(addr); 1903 desc[i].len = cpu_to_le32(len); 1904 desc[i].id = cpu_to_le16(head); 1905 1906 if (unlikely(vq->use_map_api)) { 1907 vq->packed.desc_extra[i].addr = premapped ? 1908 DMA_MAPPING_ERROR : addr; 1909 vq->packed.desc_extra[i].len = len; 1910 vq->packed.desc_extra[i].flags = 1911 le16_to_cpu(flags); 1912 } 1913 1914 if ((unlikely(++i >= vq->packed.vring.num))) { 1915 i = 0; 1916 vq->packed.avail_used_flags ^= 1917 1 << VRING_PACKED_DESC_F_AVAIL | 1918 1 << VRING_PACKED_DESC_F_USED; 1919 vq->packed.avail_wrap_counter ^= 1; 1920 } 1921 1922 if (n >= out_sgs) 1923 total_in_len += len; 1924 } 1925 } 1926 1927 /* We're using some buffers from the free list. */ 1928 vq->vq.num_free -= total_sg; 1929 1930 /* Update free pointer */ 1931 vq->packed.next_avail_idx = i; 1932 1933 /* Store token. */ 1934 vq->packed.desc_state[head].num = total_sg; 1935 vq->packed.desc_state[head].data = data; 1936 vq->packed.desc_state[head].indir_desc = ctx; 1937 vq->packed.desc_state[head].total_in_len = total_in_len; 1938 1939 /* 1940 * A driver MUST NOT make the first descriptor in the list 1941 * available before all subsequent descriptors comprising 1942 * the list are made available. 1943 */ 1944 virtio_wmb(vq->weak_barriers); 1945 vq->packed.vring.desc[head].flags = head_flags; 1946 vq->num_added += total_sg; 1947 1948 pr_debug("Added buffer head %i to %p\n", head, vq); 1949 END_USE(vq); 1950 1951 return 0; 1952 1953 unmap_release: 1954 err_idx = i; 1955 i = head; 1956 vq->packed.avail_used_flags = avail_used_flags; 1957 vq->packed.avail_wrap_counter = avail_wrap_counter; 1958 1959 for (n = 0; n < total_sg; n++) { 1960 if (i == err_idx) 1961 break; 1962 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[i]); 1963 i++; 1964 if (i >= vq->packed.vring.num) 1965 i = 0; 1966 } 1967 1968 END_USE(vq); 1969 return -EIO; 1970 } 1971 1972 static bool virtqueue_kick_prepare_packed(struct vring_virtqueue *vq) 1973 { 1974 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1975 bool needs_kick; 1976 union { 1977 struct { 1978 __le16 off_wrap; 1979 __le16 flags; 1980 }; 1981 u32 u32; 1982 } snapshot; 1983 1984 START_USE(vq); 1985 1986 /* 1987 * We need to expose the new flags value before checking notification 1988 * suppressions. 1989 */ 1990 virtio_mb(vq->weak_barriers); 1991 1992 old = vq->packed.next_avail_idx - vq->num_added; 1993 new = vq->packed.next_avail_idx; 1994 vq->num_added = 0; 1995 1996 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1997 flags = le16_to_cpu(snapshot.flags); 1998 1999 LAST_ADD_TIME_CHECK(vq); 2000 LAST_ADD_TIME_INVALID(vq); 2001 2002 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 2003 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 2004 goto out; 2005 } 2006 2007 off_wrap = le16_to_cpu(snapshot.off_wrap); 2008 2009 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 2010 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 2011 if (wrap_counter != vq->packed.avail_wrap_counter) 2012 event_idx -= vq->packed.vring.num; 2013 2014 needs_kick = vring_need_event(event_idx, new, old); 2015 out: 2016 END_USE(vq); 2017 return needs_kick; 2018 } 2019 2020 static void detach_buf_packed_in_order(struct vring_virtqueue *vq, 2021 unsigned int id, void **ctx) 2022 { 2023 struct vring_desc_state_packed *state = NULL; 2024 struct vring_packed_desc *desc; 2025 unsigned int i, curr; 2026 2027 state = &vq->packed.desc_state[id]; 2028 2029 /* Clear data ptr. */ 2030 state->data = NULL; 2031 2032 vq->vq.num_free += state->num; 2033 2034 if (unlikely(vq->use_map_api)) { 2035 curr = id; 2036 for (i = 0; i < state->num; i++) { 2037 vring_unmap_extra_packed(vq, 2038 &vq->packed.desc_extra[curr]); 2039 curr = vq->packed.desc_extra[curr].next; 2040 } 2041 } 2042 2043 if (vq->indirect) { 2044 struct vring_desc_extra *extra; 2045 u32 len, num; 2046 2047 /* Free the indirect table, if any, now that it's unmapped. */ 2048 desc = state->indir_desc; 2049 if (!desc) 2050 return; 2051 2052 if (vq->use_map_api) { 2053 len = vq->packed.desc_extra[id].len; 2054 num = len / sizeof(struct vring_packed_desc); 2055 2056 extra = (struct vring_desc_extra *)&desc[num]; 2057 2058 for (i = 0; i < num; i++) 2059 vring_unmap_extra_packed(vq, &extra[i]); 2060 } 2061 kfree(desc); 2062 state->indir_desc = NULL; 2063 } else if (ctx) { 2064 *ctx = state->indir_desc; 2065 } 2066 } 2067 2068 static void detach_buf_packed(struct vring_virtqueue *vq, 2069 unsigned int id, void **ctx) 2070 { 2071 struct vring_desc_state_packed *state = &vq->packed.desc_state[id]; 2072 2073 vq->packed.desc_extra[state->last].next = vq->free_head; 2074 vq->free_head = id; 2075 2076 detach_buf_packed_in_order(vq, id, ctx); 2077 } 2078 2079 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 2080 u16 idx, bool used_wrap_counter) 2081 { 2082 u16 flags; 2083 bool avail, used; 2084 2085 flags = vring_read_packed_desc_flags(vq, idx); 2086 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 2087 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 2088 2089 return avail == used && used == used_wrap_counter; 2090 } 2091 2092 static bool virtqueue_poll_packed(const struct vring_virtqueue *vq, 2093 unsigned int off_wrap) 2094 { 2095 bool wrap_counter; 2096 u16 used_idx; 2097 2098 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 2099 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 2100 2101 return is_used_desc_packed(vq, used_idx, wrap_counter); 2102 } 2103 2104 static bool more_used_packed(const struct vring_virtqueue *vq) 2105 { 2106 return virtqueue_poll_packed(vq, READ_ONCE(vq->last_used_idx)); 2107 } 2108 2109 static void update_last_used_idx_packed(struct vring_virtqueue *vq, 2110 u16 id, u16 last_used, 2111 u16 used_wrap_counter) 2112 { 2113 last_used += vq->packed.desc_state[id].num; 2114 if (unlikely(last_used >= vq->packed.vring.num)) { 2115 last_used -= vq->packed.vring.num; 2116 used_wrap_counter ^= 1; 2117 } 2118 2119 last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 2120 WRITE_ONCE(vq->last_used_idx, last_used); 2121 2122 /* 2123 * If we expect an interrupt for the next entry, tell host 2124 * by writing event index and flush out the write before 2125 * the read in the next get_buf call. 2126 */ 2127 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 2128 virtio_store_mb(vq->weak_barriers, 2129 &vq->packed.vring.driver->off_wrap, 2130 cpu_to_le16(vq->last_used_idx)); 2131 } 2132 2133 static bool more_used_packed_in_order(const struct vring_virtqueue *vq) 2134 { 2135 if (vq->batch_last.id != UINT_MAX) 2136 return true; 2137 2138 return virtqueue_poll_packed(vq, READ_ONCE(vq->last_used_idx)); 2139 } 2140 2141 static void *virtqueue_get_buf_ctx_packed_in_order(struct vring_virtqueue *vq, 2142 unsigned int *len, 2143 void **ctx) 2144 { 2145 unsigned int num = vq->packed.vring.num; 2146 u16 last_used, last_used_idx; 2147 bool used_wrap_counter; 2148 void *ret; 2149 2150 START_USE(vq); 2151 2152 if (unlikely(vq->broken)) { 2153 END_USE(vq); 2154 return NULL; 2155 } 2156 2157 last_used_idx = vq->last_used_idx; 2158 used_wrap_counter = packed_used_wrap_counter(last_used_idx); 2159 last_used = packed_last_used(last_used_idx); 2160 2161 if (vq->batch_last.id == UINT_MAX) { 2162 if (!more_used_packed_in_order(vq)) { 2163 pr_debug("No more buffers in queue\n"); 2164 END_USE(vq); 2165 return NULL; 2166 } 2167 /* Only get used elements after they have been exposed by host. */ 2168 virtio_rmb(vq->weak_barriers); 2169 vq->batch_last.id = 2170 le16_to_cpu(vq->packed.vring.desc[last_used].id); 2171 vq->batch_last.len = 2172 le32_to_cpu(vq->packed.vring.desc[last_used].len); 2173 } 2174 2175 if (vq->batch_last.id == last_used) { 2176 vq->batch_last.id = UINT_MAX; 2177 *len = vq->batch_last.len; 2178 } else { 2179 *len = vq->packed.desc_state[last_used].total_in_len; 2180 } 2181 2182 if (unlikely(last_used >= num)) { 2183 BAD_RING(vq, "id %u out of range\n", last_used); 2184 return NULL; 2185 } 2186 if (unlikely(!vq->packed.desc_state[last_used].data)) { 2187 BAD_RING(vq, "id %u is not a head!\n", last_used); 2188 return NULL; 2189 } 2190 2191 /* detach_buf_packed clears data, so grab it now. */ 2192 ret = vq->packed.desc_state[last_used].data; 2193 detach_buf_packed_in_order(vq, last_used, ctx); 2194 2195 update_last_used_idx_packed(vq, last_used, last_used, 2196 used_wrap_counter); 2197 2198 LAST_ADD_TIME_INVALID(vq); 2199 2200 END_USE(vq); 2201 return ret; 2202 } 2203 2204 static void *virtqueue_get_buf_ctx_packed(struct vring_virtqueue *vq, 2205 unsigned int *len, 2206 void **ctx) 2207 { 2208 unsigned int num = vq->packed.vring.num; 2209 u16 last_used, id, last_used_idx; 2210 bool used_wrap_counter; 2211 void *ret; 2212 2213 START_USE(vq); 2214 2215 if (unlikely(vq->broken)) { 2216 END_USE(vq); 2217 return NULL; 2218 } 2219 2220 if (!more_used_packed(vq)) { 2221 pr_debug("No more buffers in queue\n"); 2222 END_USE(vq); 2223 return NULL; 2224 } 2225 2226 /* Only get used elements after they have been exposed by host. */ 2227 virtio_rmb(vq->weak_barriers); 2228 2229 last_used_idx = READ_ONCE(vq->last_used_idx); 2230 used_wrap_counter = packed_used_wrap_counter(last_used_idx); 2231 last_used = packed_last_used(last_used_idx); 2232 id = vring_read_packed_desc_id(vq, last_used); 2233 *len = vring_read_packed_desc_len(vq, last_used); 2234 2235 if (unlikely(id >= num)) { 2236 BAD_RING(vq, "id %u out of range\n", id); 2237 return NULL; 2238 } 2239 if (unlikely(!vq->packed.desc_state[id].data)) { 2240 BAD_RING(vq, "id %u is not a head!\n", id); 2241 return NULL; 2242 } 2243 2244 /* detach_buf_packed clears data, so grab it now. */ 2245 ret = vq->packed.desc_state[id].data; 2246 detach_buf_packed(vq, id, ctx); 2247 2248 update_last_used_idx_packed(vq, id, last_used, used_wrap_counter); 2249 2250 LAST_ADD_TIME_INVALID(vq); 2251 2252 END_USE(vq); 2253 return ret; 2254 } 2255 2256 static void virtqueue_disable_cb_packed(struct vring_virtqueue *vq) 2257 { 2258 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 2259 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 2260 2261 /* 2262 * If device triggered an event already it won't trigger one again: 2263 * no need to disable. 2264 */ 2265 if (vq->event_triggered) 2266 return; 2267 2268 vq->packed.vring.driver->flags = 2269 cpu_to_le16(vq->packed.event_flags_shadow); 2270 } 2271 } 2272 2273 static unsigned int virtqueue_enable_cb_prepare_packed(struct vring_virtqueue *vq) 2274 { 2275 START_USE(vq); 2276 2277 /* 2278 * We optimistically turn back on interrupts, then check if there was 2279 * more to do. 2280 */ 2281 2282 if (vq->event) { 2283 vq->packed.vring.driver->off_wrap = 2284 cpu_to_le16(vq->last_used_idx); 2285 /* 2286 * We need to update event offset and event wrap 2287 * counter first before updating event flags. 2288 */ 2289 virtio_wmb(vq->weak_barriers); 2290 } 2291 2292 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 2293 vq->packed.event_flags_shadow = vq->event ? 2294 VRING_PACKED_EVENT_FLAG_DESC : 2295 VRING_PACKED_EVENT_FLAG_ENABLE; 2296 vq->packed.vring.driver->flags = 2297 cpu_to_le16(vq->packed.event_flags_shadow); 2298 } 2299 2300 END_USE(vq); 2301 return vq->last_used_idx; 2302 } 2303 2304 static bool virtqueue_enable_cb_delayed_packed(struct vring_virtqueue *vq) 2305 { 2306 u16 used_idx, wrap_counter, last_used_idx; 2307 u16 bufs; 2308 2309 START_USE(vq); 2310 2311 /* 2312 * We optimistically turn back on interrupts, then check if there was 2313 * more to do. 2314 */ 2315 2316 if (vq->event) { 2317 /* TODO: tune this threshold */ 2318 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 2319 last_used_idx = READ_ONCE(vq->last_used_idx); 2320 wrap_counter = packed_used_wrap_counter(last_used_idx); 2321 2322 used_idx = packed_last_used(last_used_idx) + bufs; 2323 if (used_idx >= vq->packed.vring.num) { 2324 used_idx -= vq->packed.vring.num; 2325 wrap_counter ^= 1; 2326 } 2327 2328 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 2329 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 2330 2331 /* 2332 * We need to update event offset and event wrap 2333 * counter first before updating event flags. 2334 */ 2335 virtio_wmb(vq->weak_barriers); 2336 } 2337 2338 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 2339 vq->packed.event_flags_shadow = vq->event ? 2340 VRING_PACKED_EVENT_FLAG_DESC : 2341 VRING_PACKED_EVENT_FLAG_ENABLE; 2342 vq->packed.vring.driver->flags = 2343 cpu_to_le16(vq->packed.event_flags_shadow); 2344 } 2345 2346 /* 2347 * We need to update event suppression structure first 2348 * before re-checking for more used buffers. 2349 */ 2350 virtio_mb(vq->weak_barriers); 2351 2352 last_used_idx = READ_ONCE(vq->last_used_idx); 2353 wrap_counter = packed_used_wrap_counter(last_used_idx); 2354 used_idx = packed_last_used(last_used_idx); 2355 if (is_used_desc_packed(vq, used_idx, wrap_counter)) { 2356 END_USE(vq); 2357 return false; 2358 } 2359 2360 END_USE(vq); 2361 return true; 2362 } 2363 2364 static void *virtqueue_detach_unused_buf_packed(struct vring_virtqueue *vq) 2365 { 2366 unsigned int i; 2367 void *buf; 2368 2369 START_USE(vq); 2370 2371 for (i = 0; i < vq->packed.vring.num; i++) { 2372 if (!vq->packed.desc_state[i].data) 2373 continue; 2374 /* detach_buf clears data, so grab it now. */ 2375 buf = vq->packed.desc_state[i].data; 2376 if (virtqueue_is_in_order(vq)) 2377 detach_buf_packed_in_order(vq, i, NULL); 2378 else 2379 detach_buf_packed(vq, i, NULL); 2380 END_USE(vq); 2381 return buf; 2382 } 2383 /* That should have freed everything. */ 2384 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 2385 2386 END_USE(vq); 2387 return NULL; 2388 } 2389 2390 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num) 2391 { 2392 struct vring_desc_extra *desc_extra; 2393 unsigned int i; 2394 2395 desc_extra = kmalloc_objs(struct vring_desc_extra, num); 2396 if (!desc_extra) 2397 return NULL; 2398 2399 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); 2400 2401 for (i = 0; i < num - 1; i++) 2402 desc_extra[i].next = i + 1; 2403 2404 desc_extra[num - 1].next = 0; 2405 2406 return desc_extra; 2407 } 2408 2409 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed, 2410 struct virtio_device *vdev, 2411 union virtio_map map) 2412 { 2413 if (vring_packed->vring.desc) 2414 vring_free_queue(vdev, vring_packed->ring_size_in_bytes, 2415 vring_packed->vring.desc, 2416 vring_packed->ring_dma_addr, 2417 map); 2418 2419 if (vring_packed->vring.driver) 2420 vring_free_queue(vdev, vring_packed->event_size_in_bytes, 2421 vring_packed->vring.driver, 2422 vring_packed->driver_event_dma_addr, 2423 map); 2424 2425 if (vring_packed->vring.device) 2426 vring_free_queue(vdev, vring_packed->event_size_in_bytes, 2427 vring_packed->vring.device, 2428 vring_packed->device_event_dma_addr, 2429 map); 2430 2431 kfree(vring_packed->desc_state); 2432 kfree(vring_packed->desc_extra); 2433 } 2434 2435 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, 2436 struct virtio_device *vdev, 2437 u32 num, union virtio_map map) 2438 { 2439 struct vring_packed_desc *ring; 2440 struct vring_packed_desc_event *driver, *device; 2441 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 2442 size_t ring_size_in_bytes, event_size_in_bytes; 2443 2444 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 2445 2446 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 2447 &ring_dma_addr, 2448 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 2449 map); 2450 if (!ring) 2451 goto err; 2452 2453 vring_packed->vring.desc = ring; 2454 vring_packed->ring_dma_addr = ring_dma_addr; 2455 vring_packed->ring_size_in_bytes = ring_size_in_bytes; 2456 2457 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 2458 2459 driver = vring_alloc_queue(vdev, event_size_in_bytes, 2460 &driver_event_dma_addr, 2461 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 2462 map); 2463 if (!driver) 2464 goto err; 2465 2466 vring_packed->vring.driver = driver; 2467 vring_packed->event_size_in_bytes = event_size_in_bytes; 2468 vring_packed->driver_event_dma_addr = driver_event_dma_addr; 2469 2470 device = vring_alloc_queue(vdev, event_size_in_bytes, 2471 &device_event_dma_addr, 2472 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 2473 map); 2474 if (!device) 2475 goto err; 2476 2477 vring_packed->vring.device = device; 2478 vring_packed->device_event_dma_addr = device_event_dma_addr; 2479 2480 vring_packed->vring.num = num; 2481 2482 return 0; 2483 2484 err: 2485 vring_free_packed(vring_packed, vdev, map); 2486 return -ENOMEM; 2487 } 2488 2489 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed) 2490 { 2491 struct vring_desc_state_packed *state; 2492 struct vring_desc_extra *extra; 2493 u32 num = vring_packed->vring.num; 2494 2495 state = kmalloc_objs(struct vring_desc_state_packed, num); 2496 if (!state) 2497 goto err_desc_state; 2498 2499 memset(state, 0, num * sizeof(struct vring_desc_state_packed)); 2500 2501 extra = vring_alloc_desc_extra(num); 2502 if (!extra) 2503 goto err_desc_extra; 2504 2505 vring_packed->desc_state = state; 2506 vring_packed->desc_extra = extra; 2507 2508 return 0; 2509 2510 err_desc_extra: 2511 kfree(state); 2512 err_desc_state: 2513 return -ENOMEM; 2514 } 2515 2516 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed, 2517 bool callback) 2518 { 2519 vring_packed->next_avail_idx = 0; 2520 vring_packed->avail_wrap_counter = 1; 2521 vring_packed->event_flags_shadow = 0; 2522 vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 2523 2524 /* No callback? Tell other side not to bother us. */ 2525 if (!callback) { 2526 vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 2527 vring_packed->vring.driver->flags = 2528 cpu_to_le16(vring_packed->event_flags_shadow); 2529 } 2530 } 2531 2532 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq, 2533 struct vring_virtqueue_packed *vring_packed) 2534 { 2535 vq->packed = *vring_packed; 2536 2537 if (virtqueue_is_in_order(vq)) { 2538 vq->batch_last.id = UINT_MAX; 2539 } else { 2540 /* 2541 * Put everything in free lists. Note that 2542 * next_avail_idx is sufficient with IN_ORDER so 2543 * free_head is unused. 2544 */ 2545 vq->free_head = 0; 2546 } 2547 } 2548 static void virtqueue_reset_packed(struct vring_virtqueue *vq) 2549 { 2550 memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes); 2551 memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes); 2552 2553 /* we need to reset the desc.flags. For more, see is_used_desc_packed() */ 2554 memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes); 2555 virtqueue_init(vq, vq->packed.vring.num); 2556 virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback); 2557 } 2558 2559 static const struct virtqueue_ops packed_ops; 2560 2561 static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index, 2562 struct vring_virtqueue_packed *vring_packed, 2563 struct virtio_device *vdev, 2564 bool weak_barriers, 2565 bool context, 2566 bool (*notify)(struct virtqueue *), 2567 void (*callback)(struct virtqueue *), 2568 const char *name, 2569 union virtio_map map) 2570 { 2571 struct vring_virtqueue *vq; 2572 int err; 2573 2574 vq = kmalloc_obj(*vq); 2575 if (!vq) 2576 return NULL; 2577 2578 vq->vq.callback = callback; 2579 vq->vq.vdev = vdev; 2580 vq->vq.name = name; 2581 vq->vq.index = index; 2582 vq->vq.reset = false; 2583 vq->we_own_ring = false; 2584 vq->notify = notify; 2585 vq->weak_barriers = weak_barriers; 2586 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 2587 vq->broken = true; 2588 #else 2589 vq->broken = false; 2590 #endif 2591 vq->map = map; 2592 vq->use_map_api = vring_use_map_api(vdev); 2593 2594 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2595 !context; 2596 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2597 vq->layout = virtio_has_feature(vdev, VIRTIO_F_IN_ORDER) ? 2598 VQ_LAYOUT_PACKED_IN_ORDER : VQ_LAYOUT_PACKED; 2599 2600 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2601 vq->weak_barriers = false; 2602 2603 err = vring_alloc_state_extra_packed(vring_packed); 2604 if (err) { 2605 kfree(vq); 2606 return NULL; 2607 } 2608 2609 virtqueue_vring_init_packed(vring_packed, !!callback); 2610 2611 virtqueue_init(vq, vring_packed->vring.num); 2612 virtqueue_vring_attach_packed(vq, vring_packed); 2613 2614 spin_lock(&vdev->vqs_list_lock); 2615 list_add_tail(&vq->vq.list, &vdev->vqs); 2616 spin_unlock(&vdev->vqs_list_lock); 2617 return &vq->vq; 2618 } 2619 2620 static struct virtqueue *vring_create_virtqueue_packed( 2621 unsigned int index, 2622 unsigned int num, 2623 unsigned int vring_align, 2624 struct virtio_device *vdev, 2625 bool weak_barriers, 2626 bool may_reduce_num, 2627 bool context, 2628 bool (*notify)(struct virtqueue *), 2629 void (*callback)(struct virtqueue *), 2630 const char *name, 2631 union virtio_map map) 2632 { 2633 struct vring_virtqueue_packed vring_packed = {}; 2634 struct virtqueue *vq; 2635 2636 if (vring_alloc_queue_packed(&vring_packed, vdev, num, map)) 2637 return NULL; 2638 2639 vq = __vring_new_virtqueue_packed(index, &vring_packed, vdev, weak_barriers, 2640 context, notify, callback, name, map); 2641 if (!vq) { 2642 vring_free_packed(&vring_packed, vdev, map); 2643 return NULL; 2644 } 2645 2646 to_vvq(vq)->we_own_ring = true; 2647 2648 return vq; 2649 } 2650 2651 static int virtqueue_resize_packed(struct vring_virtqueue *vq, u32 num) 2652 { 2653 struct vring_virtqueue_packed vring_packed = {}; 2654 struct virtio_device *vdev = vq->vq.vdev; 2655 int err; 2656 2657 if (vring_alloc_queue_packed(&vring_packed, vdev, num, vq->map)) 2658 goto err_ring; 2659 2660 err = vring_alloc_state_extra_packed(&vring_packed); 2661 if (err) 2662 goto err_state_extra; 2663 2664 vring_free(&vq->vq); 2665 2666 virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback); 2667 2668 virtqueue_init(vq, vring_packed.vring.num); 2669 virtqueue_vring_attach_packed(vq, &vring_packed); 2670 2671 return 0; 2672 2673 err_state_extra: 2674 vring_free_packed(&vring_packed, vdev, vq->map); 2675 err_ring: 2676 virtqueue_reset_packed(vq); 2677 return -ENOMEM; 2678 } 2679 2680 static const struct virtqueue_ops split_ops = { 2681 .add = virtqueue_add_split, 2682 .get = virtqueue_get_buf_ctx_split, 2683 .kick_prepare = virtqueue_kick_prepare_split, 2684 .disable_cb = virtqueue_disable_cb_split, 2685 .enable_cb_delayed = virtqueue_enable_cb_delayed_split, 2686 .enable_cb_prepare = virtqueue_enable_cb_prepare_split, 2687 .poll = virtqueue_poll_split, 2688 .detach_unused_buf = virtqueue_detach_unused_buf_split, 2689 .more_used = more_used_split, 2690 .resize = virtqueue_resize_split, 2691 .reset = virtqueue_reset_split, 2692 }; 2693 2694 static const struct virtqueue_ops packed_ops = { 2695 .add = virtqueue_add_packed, 2696 .get = virtqueue_get_buf_ctx_packed, 2697 .kick_prepare = virtqueue_kick_prepare_packed, 2698 .disable_cb = virtqueue_disable_cb_packed, 2699 .enable_cb_delayed = virtqueue_enable_cb_delayed_packed, 2700 .enable_cb_prepare = virtqueue_enable_cb_prepare_packed, 2701 .poll = virtqueue_poll_packed, 2702 .detach_unused_buf = virtqueue_detach_unused_buf_packed, 2703 .more_used = more_used_packed, 2704 .resize = virtqueue_resize_packed, 2705 .reset = virtqueue_reset_packed, 2706 }; 2707 2708 static const struct virtqueue_ops split_in_order_ops = { 2709 .add = virtqueue_add_split, 2710 .get = virtqueue_get_buf_ctx_split_in_order, 2711 .kick_prepare = virtqueue_kick_prepare_split, 2712 .disable_cb = virtqueue_disable_cb_split, 2713 .enable_cb_delayed = virtqueue_enable_cb_delayed_split, 2714 .enable_cb_prepare = virtqueue_enable_cb_prepare_split, 2715 .poll = virtqueue_poll_split, 2716 .detach_unused_buf = virtqueue_detach_unused_buf_split, 2717 .more_used = more_used_split_in_order, 2718 .resize = virtqueue_resize_split, 2719 .reset = virtqueue_reset_split, 2720 }; 2721 2722 static const struct virtqueue_ops packed_in_order_ops = { 2723 .add = virtqueue_add_packed_in_order, 2724 .get = virtqueue_get_buf_ctx_packed_in_order, 2725 .kick_prepare = virtqueue_kick_prepare_packed, 2726 .disable_cb = virtqueue_disable_cb_packed, 2727 .enable_cb_delayed = virtqueue_enable_cb_delayed_packed, 2728 .enable_cb_prepare = virtqueue_enable_cb_prepare_packed, 2729 .poll = virtqueue_poll_packed, 2730 .detach_unused_buf = virtqueue_detach_unused_buf_packed, 2731 .more_used = more_used_packed_in_order, 2732 .resize = virtqueue_resize_packed, 2733 .reset = virtqueue_reset_packed, 2734 }; 2735 2736 static int virtqueue_disable_and_recycle(struct virtqueue *_vq, 2737 void (*recycle)(struct virtqueue *vq, void *buf)) 2738 { 2739 struct vring_virtqueue *vq = to_vvq(_vq); 2740 struct virtio_device *vdev = vq->vq.vdev; 2741 void *buf; 2742 int err; 2743 2744 if (!vq->we_own_ring) 2745 return -EPERM; 2746 2747 if (!vdev->config->disable_vq_and_reset) 2748 return -ENOENT; 2749 2750 if (!vdev->config->enable_vq_after_reset) 2751 return -ENOENT; 2752 2753 err = vdev->config->disable_vq_and_reset(_vq); 2754 if (err) 2755 return err; 2756 2757 while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) 2758 recycle(_vq, buf); 2759 2760 return 0; 2761 } 2762 2763 static int virtqueue_enable_after_reset(struct virtqueue *_vq) 2764 { 2765 struct vring_virtqueue *vq = to_vvq(_vq); 2766 struct virtio_device *vdev = vq->vq.vdev; 2767 2768 if (vdev->config->enable_vq_after_reset(_vq)) 2769 return -EBUSY; 2770 2771 return 0; 2772 } 2773 2774 /* 2775 * Generic functions and exported symbols. 2776 */ 2777 2778 #define VIRTQUEUE_CALL(vq, op, ...) \ 2779 ({ \ 2780 typeof(vq) __VIRTQUEUE_CALL_vq = (vq); \ 2781 typeof(split_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__)) ret; \ 2782 \ 2783 switch (__VIRTQUEUE_CALL_vq->layout) { \ 2784 case VQ_LAYOUT_SPLIT: \ 2785 ret = split_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__); \ 2786 break; \ 2787 case VQ_LAYOUT_PACKED: \ 2788 ret = packed_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__);\ 2789 break; \ 2790 case VQ_LAYOUT_SPLIT_IN_ORDER: \ 2791 ret = split_in_order_ops.op(vq, ##__VA_ARGS__); \ 2792 break; \ 2793 case VQ_LAYOUT_PACKED_IN_ORDER: \ 2794 ret = packed_in_order_ops.op(vq, ##__VA_ARGS__); \ 2795 break; \ 2796 default: \ 2797 BUG(); \ 2798 break; \ 2799 } \ 2800 ret; \ 2801 }) 2802 2803 #define VOID_VIRTQUEUE_CALL(vq, op, ...) \ 2804 ({ \ 2805 typeof(vq) __VIRTQUEUE_CALL_vq = (vq); \ 2806 \ 2807 switch (__VIRTQUEUE_CALL_vq->layout) { \ 2808 case VQ_LAYOUT_SPLIT: \ 2809 split_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__); \ 2810 break; \ 2811 case VQ_LAYOUT_PACKED: \ 2812 packed_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__); \ 2813 break; \ 2814 case VQ_LAYOUT_SPLIT_IN_ORDER: \ 2815 split_in_order_ops.op(vq, ##__VA_ARGS__); \ 2816 break; \ 2817 case VQ_LAYOUT_PACKED_IN_ORDER: \ 2818 packed_in_order_ops.op(vq, ##__VA_ARGS__); \ 2819 break; \ 2820 default: \ 2821 BUG(); \ 2822 break; \ 2823 } \ 2824 }) 2825 2826 static inline int virtqueue_add(struct virtqueue *_vq, 2827 struct scatterlist *sgs[], 2828 unsigned int total_sg, 2829 unsigned int out_sgs, 2830 unsigned int in_sgs, 2831 void *data, 2832 void *ctx, 2833 bool premapped, 2834 gfp_t gfp, 2835 unsigned long attr) 2836 { 2837 struct vring_virtqueue *vq = to_vvq(_vq); 2838 2839 return VIRTQUEUE_CALL(vq, add, sgs, total_sg, 2840 out_sgs, in_sgs, data, 2841 ctx, premapped, gfp, attr); 2842 } 2843 2844 /** 2845 * virtqueue_add_sgs - expose buffers to other end 2846 * @_vq: the struct virtqueue we're talking about. 2847 * @sgs: array of terminated scatterlists. 2848 * @out_sgs: the number of scatterlists readable by other side 2849 * @in_sgs: the number of scatterlists which are writable (after readable ones) 2850 * @data: the token identifying the buffer. 2851 * @gfp: how to do memory allocations (if necessary). 2852 * 2853 * Caller must ensure we don't call this with other virtqueue operations 2854 * at the same time (except where noted). 2855 * 2856 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2857 * 2858 * NB: ENOSPC is a special code that is only returned on an attempt to add a 2859 * buffer to a full VQ. It indicates that some buffers are outstanding and that 2860 * the operation can be retried after some buffers have been used. 2861 */ 2862 int virtqueue_add_sgs(struct virtqueue *_vq, 2863 struct scatterlist *sgs[], 2864 unsigned int out_sgs, 2865 unsigned int in_sgs, 2866 void *data, 2867 gfp_t gfp) 2868 { 2869 unsigned int i, total_sg = 0; 2870 2871 /* Count them first. */ 2872 for (i = 0; i < out_sgs + in_sgs; i++) { 2873 struct scatterlist *sg; 2874 2875 for (sg = sgs[i]; sg; sg = sg_next(sg)) 2876 total_sg++; 2877 } 2878 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 2879 data, NULL, false, gfp, 0); 2880 } 2881 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 2882 2883 /** 2884 * virtqueue_add_outbuf - expose output buffers to other end 2885 * @vq: the struct virtqueue we're talking about. 2886 * @sg: scatterlist (must be well-formed and terminated!) 2887 * @num: the number of entries in @sg readable by other side 2888 * @data: the token identifying the buffer. 2889 * @gfp: how to do memory allocations (if necessary). 2890 * 2891 * Caller must ensure we don't call this with other virtqueue operations 2892 * at the same time (except where noted). 2893 * 2894 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2895 */ 2896 int virtqueue_add_outbuf(struct virtqueue *vq, 2897 struct scatterlist *sg, unsigned int num, 2898 void *data, 2899 gfp_t gfp) 2900 { 2901 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, false, gfp, 0); 2902 } 2903 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 2904 2905 /** 2906 * virtqueue_add_outbuf_premapped - expose output buffers to other end 2907 * @vq: the struct virtqueue we're talking about. 2908 * @sg: scatterlist (must be well-formed and terminated!) 2909 * @num: the number of entries in @sg readable by other side 2910 * @data: the token identifying the buffer. 2911 * @gfp: how to do memory allocations (if necessary). 2912 * 2913 * Caller must ensure we don't call this with other virtqueue operations 2914 * at the same time (except where noted). 2915 * 2916 * Return: 2917 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2918 */ 2919 int virtqueue_add_outbuf_premapped(struct virtqueue *vq, 2920 struct scatterlist *sg, unsigned int num, 2921 void *data, 2922 gfp_t gfp) 2923 { 2924 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, true, gfp, 0); 2925 } 2926 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped); 2927 2928 /** 2929 * virtqueue_add_inbuf - expose input buffers to other end 2930 * @vq: the struct virtqueue we're talking about. 2931 * @sg: scatterlist (must be well-formed and terminated!) 2932 * @num: the number of entries in @sg writable by other side 2933 * @data: the token identifying the buffer. 2934 * @gfp: how to do memory allocations (if necessary). 2935 * 2936 * Caller must ensure we don't call this with other virtqueue operations 2937 * at the same time (except where noted). 2938 * 2939 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2940 */ 2941 int virtqueue_add_inbuf(struct virtqueue *vq, 2942 struct scatterlist *sg, unsigned int num, 2943 void *data, 2944 gfp_t gfp) 2945 { 2946 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp, 0); 2947 } 2948 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 2949 2950 /** 2951 * virtqueue_add_inbuf_cache_clean - expose input buffers with cache clean 2952 * @vq: the struct virtqueue we're talking about. 2953 * @sg: scatterlist (must be well-formed and terminated!) 2954 * @num: the number of entries in @sg writable by other side 2955 * @data: the token identifying the buffer. 2956 * @gfp: how to do memory allocations (if necessary). 2957 * 2958 * Same as virtqueue_add_inbuf but passes DMA_ATTR_DEBUGGING_IGNORE_CACHELINES 2959 * to indicate that the CPU will not dirty any cacheline overlapping this buffer 2960 * while it is available, and to suppress overlapping cacheline warnings in DMA 2961 * debug builds. 2962 * 2963 * Caller must ensure we don't call this with other virtqueue operations 2964 * at the same time (except where noted). 2965 * 2966 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2967 */ 2968 int virtqueue_add_inbuf_cache_clean(struct virtqueue *vq, 2969 struct scatterlist *sg, unsigned int num, 2970 void *data, 2971 gfp_t gfp) 2972 { 2973 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp, 2974 DMA_ATTR_DEBUGGING_IGNORE_CACHELINES); 2975 } 2976 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_cache_clean); 2977 2978 /** 2979 * virtqueue_add_inbuf_ctx - expose input buffers to other end 2980 * @vq: the struct virtqueue we're talking about. 2981 * @sg: scatterlist (must be well-formed and terminated!) 2982 * @num: the number of entries in @sg writable by other side 2983 * @data: the token identifying the buffer. 2984 * @ctx: extra context for the token 2985 * @gfp: how to do memory allocations (if necessary). 2986 * 2987 * Caller must ensure we don't call this with other virtqueue operations 2988 * at the same time (except where noted). 2989 * 2990 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2991 */ 2992 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 2993 struct scatterlist *sg, unsigned int num, 2994 void *data, 2995 void *ctx, 2996 gfp_t gfp) 2997 { 2998 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, false, gfp, 0); 2999 } 3000 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 3001 3002 /** 3003 * virtqueue_add_inbuf_premapped - expose input buffers to other end 3004 * @vq: the struct virtqueue we're talking about. 3005 * @sg: scatterlist (must be well-formed and terminated!) 3006 * @num: the number of entries in @sg writable by other side 3007 * @data: the token identifying the buffer. 3008 * @ctx: extra context for the token 3009 * @gfp: how to do memory allocations (if necessary). 3010 * 3011 * Caller must ensure we don't call this with other virtqueue operations 3012 * at the same time (except where noted). 3013 * 3014 * Return: 3015 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 3016 */ 3017 int virtqueue_add_inbuf_premapped(struct virtqueue *vq, 3018 struct scatterlist *sg, unsigned int num, 3019 void *data, 3020 void *ctx, 3021 gfp_t gfp) 3022 { 3023 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, true, gfp, 0); 3024 } 3025 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_premapped); 3026 3027 /** 3028 * virtqueue_dma_dev - get the dma dev 3029 * @_vq: the struct virtqueue we're talking about. 3030 * 3031 * Returns the dma dev. That can been used for dma api. 3032 */ 3033 struct device *virtqueue_dma_dev(struct virtqueue *_vq) 3034 { 3035 struct vring_virtqueue *vq = to_vvq(_vq); 3036 3037 if (vq->use_map_api && !_vq->vdev->map) 3038 return vq->map.dma_dev; 3039 else 3040 return NULL; 3041 } 3042 EXPORT_SYMBOL_GPL(virtqueue_dma_dev); 3043 3044 /** 3045 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 3046 * @_vq: the struct virtqueue 3047 * 3048 * Instead of virtqueue_kick(), you can do: 3049 * if (virtqueue_kick_prepare(vq)) 3050 * virtqueue_notify(vq); 3051 * 3052 * This is sometimes useful because the virtqueue_kick_prepare() needs 3053 * to be serialized, but the actual virtqueue_notify() call does not. 3054 */ 3055 bool virtqueue_kick_prepare(struct virtqueue *_vq) 3056 { 3057 struct vring_virtqueue *vq = to_vvq(_vq); 3058 3059 return VIRTQUEUE_CALL(vq, kick_prepare); 3060 } 3061 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 3062 3063 /** 3064 * virtqueue_notify - second half of split virtqueue_kick call. 3065 * @_vq: the struct virtqueue 3066 * 3067 * This does not need to be serialized. 3068 * 3069 * Returns false if host notify failed or queue is broken, otherwise true. 3070 */ 3071 bool virtqueue_notify(struct virtqueue *_vq) 3072 { 3073 struct vring_virtqueue *vq = to_vvq(_vq); 3074 3075 if (unlikely(vq->broken)) 3076 return false; 3077 3078 /* Prod other side to tell it about changes. */ 3079 if (!vq->notify(_vq)) { 3080 vq->broken = true; 3081 return false; 3082 } 3083 return true; 3084 } 3085 EXPORT_SYMBOL_GPL(virtqueue_notify); 3086 3087 /** 3088 * virtqueue_kick - update after add_buf 3089 * @vq: the struct virtqueue 3090 * 3091 * After one or more virtqueue_add_* calls, invoke this to kick 3092 * the other side. 3093 * 3094 * Caller must ensure we don't call this with other virtqueue 3095 * operations at the same time (except where noted). 3096 * 3097 * Returns false if kick failed, otherwise true. 3098 */ 3099 bool virtqueue_kick(struct virtqueue *vq) 3100 { 3101 if (virtqueue_kick_prepare(vq)) 3102 return virtqueue_notify(vq); 3103 return true; 3104 } 3105 EXPORT_SYMBOL_GPL(virtqueue_kick); 3106 3107 /** 3108 * virtqueue_get_buf_ctx - get the next used buffer 3109 * @_vq: the struct virtqueue we're talking about. 3110 * @len: the length written into the buffer 3111 * @ctx: extra context for the token 3112 * 3113 * If the device wrote data into the buffer, @len will be set to the 3114 * amount written. This means you don't need to clear the buffer 3115 * beforehand to ensure there's no data leakage in the case of short 3116 * writes. 3117 * 3118 * Caller must ensure we don't call this with other virtqueue 3119 * operations at the same time (except where noted). 3120 * 3121 * Returns NULL if there are no used buffers, or the "data" token 3122 * handed to virtqueue_add_*(). 3123 */ 3124 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 3125 void **ctx) 3126 { 3127 struct vring_virtqueue *vq = to_vvq(_vq); 3128 3129 return VIRTQUEUE_CALL(vq, get, len, ctx); 3130 } 3131 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 3132 3133 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 3134 { 3135 return virtqueue_get_buf_ctx(_vq, len, NULL); 3136 } 3137 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 3138 /** 3139 * virtqueue_disable_cb - disable callbacks 3140 * @_vq: the struct virtqueue we're talking about. 3141 * 3142 * Note that this is not necessarily synchronous, hence unreliable and only 3143 * useful as an optimization. 3144 * 3145 * Unlike other operations, this need not be serialized. 3146 */ 3147 void virtqueue_disable_cb(struct virtqueue *_vq) 3148 { 3149 struct vring_virtqueue *vq = to_vvq(_vq); 3150 3151 VOID_VIRTQUEUE_CALL(vq, disable_cb); 3152 } 3153 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 3154 3155 /** 3156 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 3157 * @_vq: the struct virtqueue we're talking about. 3158 * 3159 * This re-enables callbacks; it returns current queue state 3160 * in an opaque unsigned value. This value should be later tested by 3161 * virtqueue_poll, to detect a possible race between the driver checking for 3162 * more work, and enabling callbacks. 3163 * 3164 * Caller must ensure we don't call this with other virtqueue 3165 * operations at the same time (except where noted). 3166 */ 3167 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq) 3168 { 3169 struct vring_virtqueue *vq = to_vvq(_vq); 3170 3171 if (vq->event_triggered) 3172 vq->event_triggered = false; 3173 3174 return VIRTQUEUE_CALL(vq, enable_cb_prepare); 3175 } 3176 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 3177 3178 /** 3179 * virtqueue_poll - query pending used buffers 3180 * @_vq: the struct virtqueue we're talking about. 3181 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 3182 * 3183 * Returns "true" if there are pending used buffers in the queue. 3184 * 3185 * This does not need to be serialized. 3186 */ 3187 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx) 3188 { 3189 struct vring_virtqueue *vq = to_vvq(_vq); 3190 3191 if (unlikely(vq->broken)) 3192 return false; 3193 3194 virtio_mb(vq->weak_barriers); 3195 3196 return VIRTQUEUE_CALL(vq, poll, last_used_idx); 3197 } 3198 EXPORT_SYMBOL_GPL(virtqueue_poll); 3199 3200 /** 3201 * virtqueue_enable_cb - restart callbacks after disable_cb. 3202 * @_vq: the struct virtqueue we're talking about. 3203 * 3204 * This re-enables callbacks; it returns "false" if there are pending 3205 * buffers in the queue, to detect a possible race between the driver 3206 * checking for more work, and enabling callbacks. 3207 * 3208 * Caller must ensure we don't call this with other virtqueue 3209 * operations at the same time (except where noted). 3210 */ 3211 bool virtqueue_enable_cb(struct virtqueue *_vq) 3212 { 3213 unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq); 3214 3215 return !virtqueue_poll(_vq, last_used_idx); 3216 } 3217 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 3218 3219 /** 3220 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 3221 * @_vq: the struct virtqueue we're talking about. 3222 * 3223 * This re-enables callbacks but hints to the other side to delay 3224 * interrupts until most of the available buffers have been processed; 3225 * it returns "false" if there are many pending buffers in the queue, 3226 * to detect a possible race between the driver checking for more work, 3227 * and enabling callbacks. 3228 * 3229 * Caller must ensure we don't call this with other virtqueue 3230 * operations at the same time (except where noted). 3231 */ 3232 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 3233 { 3234 struct vring_virtqueue *vq = to_vvq(_vq); 3235 3236 if (vq->event_triggered) 3237 data_race(vq->event_triggered = false); 3238 3239 return VIRTQUEUE_CALL(vq, enable_cb_delayed); 3240 } 3241 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 3242 3243 /** 3244 * virtqueue_detach_unused_buf - detach first unused buffer 3245 * @_vq: the struct virtqueue we're talking about. 3246 * 3247 * Returns NULL or the "data" token handed to virtqueue_add_*(). 3248 * This is not valid on an active queue; it is useful for device 3249 * shutdown or the reset queue. 3250 */ 3251 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 3252 { 3253 struct vring_virtqueue *vq = to_vvq(_vq); 3254 3255 return VIRTQUEUE_CALL(vq, detach_unused_buf); 3256 } 3257 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 3258 3259 static inline bool more_used(const struct vring_virtqueue *vq) 3260 { 3261 return VIRTQUEUE_CALL(vq, more_used); 3262 } 3263 3264 /** 3265 * vring_interrupt - notify a virtqueue on an interrupt 3266 * @irq: the IRQ number (ignored) 3267 * @_vq: the struct virtqueue to notify 3268 * 3269 * Calls the callback function of @_vq to process the virtqueue 3270 * notification. 3271 */ 3272 irqreturn_t vring_interrupt(int irq, void *_vq) 3273 { 3274 struct vring_virtqueue *vq = to_vvq(_vq); 3275 3276 if (!more_used(vq)) { 3277 pr_debug("virtqueue interrupt with no work for %p\n", vq); 3278 return IRQ_NONE; 3279 } 3280 3281 if (unlikely(vq->broken)) { 3282 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 3283 dev_warn_once(&vq->vq.vdev->dev, 3284 "virtio vring IRQ raised before DRIVER_OK"); 3285 return IRQ_NONE; 3286 #else 3287 return IRQ_HANDLED; 3288 #endif 3289 } 3290 3291 /* Just a hint for performance: so it's ok that this can be racy! */ 3292 if (vq->event) 3293 data_race(vq->event_triggered = true); 3294 3295 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 3296 if (vq->vq.callback) 3297 vq->vq.callback(&vq->vq); 3298 3299 return IRQ_HANDLED; 3300 } 3301 EXPORT_SYMBOL_GPL(vring_interrupt); 3302 3303 struct virtqueue *vring_create_virtqueue( 3304 unsigned int index, 3305 unsigned int num, 3306 unsigned int vring_align, 3307 struct virtio_device *vdev, 3308 bool weak_barriers, 3309 bool may_reduce_num, 3310 bool context, 3311 bool (*notify)(struct virtqueue *), 3312 void (*callback)(struct virtqueue *), 3313 const char *name) 3314 { 3315 union virtio_map map = {.dma_dev = vdev->dev.parent}; 3316 3317 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 3318 return vring_create_virtqueue_packed(index, num, vring_align, 3319 vdev, weak_barriers, may_reduce_num, 3320 context, notify, callback, name, map); 3321 3322 return vring_create_virtqueue_split(index, num, vring_align, 3323 vdev, weak_barriers, may_reduce_num, 3324 context, notify, callback, name, map); 3325 } 3326 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 3327 3328 struct virtqueue *vring_create_virtqueue_map( 3329 unsigned int index, 3330 unsigned int num, 3331 unsigned int vring_align, 3332 struct virtio_device *vdev, 3333 bool weak_barriers, 3334 bool may_reduce_num, 3335 bool context, 3336 bool (*notify)(struct virtqueue *), 3337 void (*callback)(struct virtqueue *), 3338 const char *name, 3339 union virtio_map map) 3340 { 3341 3342 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 3343 return vring_create_virtqueue_packed(index, num, vring_align, 3344 vdev, weak_barriers, may_reduce_num, 3345 context, notify, callback, name, map); 3346 3347 return vring_create_virtqueue_split(index, num, vring_align, 3348 vdev, weak_barriers, may_reduce_num, 3349 context, notify, callback, name, map); 3350 } 3351 EXPORT_SYMBOL_GPL(vring_create_virtqueue_map); 3352 3353 /** 3354 * virtqueue_resize - resize the vring of vq 3355 * @_vq: the struct virtqueue we're talking about. 3356 * @num: new ring num 3357 * @recycle: callback to recycle unused buffers 3358 * @recycle_done: callback to be invoked when recycle for all unused buffers done 3359 * 3360 * When it is really necessary to create a new vring, it will set the current vq 3361 * into the reset state. Then call the passed callback to recycle the buffer 3362 * that is no longer used. Only after the new vring is successfully created, the 3363 * old vring will be released. 3364 * 3365 * Caller must ensure we don't call this with other virtqueue operations 3366 * at the same time (except where noted). 3367 * 3368 * Returns zero or a negative error. 3369 * 0: success. 3370 * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size. 3371 * vq can still work normally 3372 * -EBUSY: Failed to sync with device, vq may not work properly 3373 * -ENOENT: Transport or device not supported 3374 * -E2BIG/-EINVAL: num error 3375 * -EPERM: Operation not permitted 3376 * 3377 */ 3378 int virtqueue_resize(struct virtqueue *_vq, u32 num, 3379 void (*recycle)(struct virtqueue *vq, void *buf), 3380 void (*recycle_done)(struct virtqueue *vq)) 3381 { 3382 struct vring_virtqueue *vq = to_vvq(_vq); 3383 int err, err_reset; 3384 3385 if (num > vq->vq.num_max) 3386 return -E2BIG; 3387 3388 if (!num) 3389 return -EINVAL; 3390 3391 if (virtqueue_get_vring_size(_vq) == num) 3392 return 0; 3393 3394 err = virtqueue_disable_and_recycle(_vq, recycle); 3395 if (err) 3396 return err; 3397 if (recycle_done) 3398 recycle_done(_vq); 3399 3400 err = VIRTQUEUE_CALL(vq, resize, num); 3401 3402 err_reset = virtqueue_enable_after_reset(_vq); 3403 if (err_reset) 3404 return err_reset; 3405 3406 return err; 3407 } 3408 EXPORT_SYMBOL_GPL(virtqueue_resize); 3409 3410 /** 3411 * virtqueue_reset - detach and recycle all unused buffers 3412 * @_vq: the struct virtqueue we're talking about. 3413 * @recycle: callback to recycle unused buffers 3414 * @recycle_done: callback to be invoked when recycle for all unused buffers done 3415 * 3416 * Caller must ensure we don't call this with other virtqueue operations 3417 * at the same time (except where noted). 3418 * 3419 * Returns zero or a negative error. 3420 * 0: success. 3421 * -EBUSY: Failed to sync with device, vq may not work properly 3422 * -ENOENT: Transport or device not supported 3423 * -EPERM: Operation not permitted 3424 */ 3425 int virtqueue_reset(struct virtqueue *_vq, 3426 void (*recycle)(struct virtqueue *vq, void *buf), 3427 void (*recycle_done)(struct virtqueue *vq)) 3428 { 3429 struct vring_virtqueue *vq = to_vvq(_vq); 3430 int err; 3431 3432 err = virtqueue_disable_and_recycle(_vq, recycle); 3433 if (err) 3434 return err; 3435 if (recycle_done) 3436 recycle_done(_vq); 3437 3438 VOID_VIRTQUEUE_CALL(vq, reset); 3439 3440 return virtqueue_enable_after_reset(_vq); 3441 } 3442 EXPORT_SYMBOL_GPL(virtqueue_reset); 3443 3444 struct virtqueue *vring_new_virtqueue(unsigned int index, 3445 unsigned int num, 3446 unsigned int vring_align, 3447 struct virtio_device *vdev, 3448 bool weak_barriers, 3449 bool context, 3450 void *pages, 3451 bool (*notify)(struct virtqueue *vq), 3452 void (*callback)(struct virtqueue *vq), 3453 const char *name) 3454 { 3455 struct vring_virtqueue_split vring_split = {}; 3456 union virtio_map map = {.dma_dev = vdev->dev.parent}; 3457 3458 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 3459 struct vring_virtqueue_packed vring_packed = {}; 3460 3461 vring_packed.vring.num = num; 3462 vring_packed.vring.desc = pages; 3463 return __vring_new_virtqueue_packed(index, &vring_packed, 3464 vdev, weak_barriers, 3465 context, notify, callback, 3466 name, map); 3467 } 3468 3469 vring_init(&vring_split.vring, num, pages, vring_align); 3470 return __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers, 3471 context, notify, callback, name, 3472 map); 3473 } 3474 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 3475 3476 static void vring_free(struct virtqueue *_vq) 3477 { 3478 struct vring_virtqueue *vq = to_vvq(_vq); 3479 3480 if (vq->we_own_ring) { 3481 if (virtqueue_is_packed(vq)) { 3482 vring_free_queue(vq->vq.vdev, 3483 vq->packed.ring_size_in_bytes, 3484 vq->packed.vring.desc, 3485 vq->packed.ring_dma_addr, 3486 vq->map); 3487 3488 vring_free_queue(vq->vq.vdev, 3489 vq->packed.event_size_in_bytes, 3490 vq->packed.vring.driver, 3491 vq->packed.driver_event_dma_addr, 3492 vq->map); 3493 3494 vring_free_queue(vq->vq.vdev, 3495 vq->packed.event_size_in_bytes, 3496 vq->packed.vring.device, 3497 vq->packed.device_event_dma_addr, 3498 vq->map); 3499 3500 kfree(vq->packed.desc_state); 3501 kfree(vq->packed.desc_extra); 3502 } else { 3503 vring_free_queue(vq->vq.vdev, 3504 vq->split.queue_size_in_bytes, 3505 vq->split.vring.desc, 3506 vq->split.queue_dma_addr, 3507 vq->map); 3508 } 3509 } 3510 if (!virtqueue_is_packed(vq)) { 3511 kfree(vq->split.desc_state); 3512 kfree(vq->split.desc_extra); 3513 } 3514 } 3515 3516 void vring_del_virtqueue(struct virtqueue *_vq) 3517 { 3518 struct vring_virtqueue *vq = to_vvq(_vq); 3519 3520 spin_lock(&vq->vq.vdev->vqs_list_lock); 3521 list_del(&_vq->list); 3522 spin_unlock(&vq->vq.vdev->vqs_list_lock); 3523 3524 vring_free(_vq); 3525 3526 kfree(vq); 3527 } 3528 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 3529 3530 u32 vring_notification_data(struct virtqueue *_vq) 3531 { 3532 struct vring_virtqueue *vq = to_vvq(_vq); 3533 u16 next; 3534 3535 if (virtqueue_is_packed(vq)) 3536 next = (vq->packed.next_avail_idx & 3537 ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) | 3538 vq->packed.avail_wrap_counter << 3539 VRING_PACKED_EVENT_F_WRAP_CTR; 3540 else 3541 next = vq->split.avail_idx_shadow; 3542 3543 return next << 16 | _vq->index; 3544 } 3545 EXPORT_SYMBOL_GPL(vring_notification_data); 3546 3547 /* Manipulates transport-specific feature bits. */ 3548 void vring_transport_features(struct virtio_device *vdev) 3549 { 3550 unsigned int i; 3551 3552 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 3553 switch (i) { 3554 case VIRTIO_RING_F_INDIRECT_DESC: 3555 break; 3556 case VIRTIO_RING_F_EVENT_IDX: 3557 break; 3558 case VIRTIO_F_VERSION_1: 3559 break; 3560 case VIRTIO_F_ACCESS_PLATFORM: 3561 break; 3562 case VIRTIO_F_RING_PACKED: 3563 break; 3564 case VIRTIO_F_ORDER_PLATFORM: 3565 break; 3566 case VIRTIO_F_NOTIFICATION_DATA: 3567 break; 3568 case VIRTIO_F_IN_ORDER: 3569 break; 3570 default: 3571 /* We don't understand this bit. */ 3572 __virtio_clear_bit(vdev, i); 3573 } 3574 } 3575 } 3576 EXPORT_SYMBOL_GPL(vring_transport_features); 3577 3578 /** 3579 * virtqueue_get_vring_size - return the size of the virtqueue's vring 3580 * @_vq: the struct virtqueue containing the vring of interest. 3581 * 3582 * Returns the size of the vring. This is mainly used for boasting to 3583 * userspace. Unlike other operations, this need not be serialized. 3584 */ 3585 unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq) 3586 { 3587 3588 const struct vring_virtqueue *vq = to_vvq(_vq); 3589 3590 return virtqueue_is_packed(vq) ? vq->packed.vring.num : 3591 vq->split.vring.num; 3592 } 3593 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 3594 3595 /* 3596 * This function should only be called by the core, not directly by the driver. 3597 */ 3598 void __virtqueue_break(struct virtqueue *_vq) 3599 { 3600 struct vring_virtqueue *vq = to_vvq(_vq); 3601 3602 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 3603 WRITE_ONCE(vq->broken, true); 3604 } 3605 EXPORT_SYMBOL_GPL(__virtqueue_break); 3606 3607 /* 3608 * This function should only be called by the core, not directly by the driver. 3609 */ 3610 void __virtqueue_unbreak(struct virtqueue *_vq) 3611 { 3612 struct vring_virtqueue *vq = to_vvq(_vq); 3613 3614 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 3615 WRITE_ONCE(vq->broken, false); 3616 } 3617 EXPORT_SYMBOL_GPL(__virtqueue_unbreak); 3618 3619 bool virtqueue_is_broken(const struct virtqueue *_vq) 3620 { 3621 const struct vring_virtqueue *vq = to_vvq(_vq); 3622 3623 return READ_ONCE(vq->broken); 3624 } 3625 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 3626 3627 /* 3628 * This should prevent the device from being used, allowing drivers to 3629 * recover. You may need to grab appropriate locks to flush. 3630 */ 3631 void virtio_break_device(struct virtio_device *dev) 3632 { 3633 struct virtqueue *_vq; 3634 3635 spin_lock(&dev->vqs_list_lock); 3636 list_for_each_entry(_vq, &dev->vqs, list) { 3637 struct vring_virtqueue *vq = to_vvq(_vq); 3638 3639 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 3640 WRITE_ONCE(vq->broken, true); 3641 } 3642 spin_unlock(&dev->vqs_list_lock); 3643 } 3644 EXPORT_SYMBOL_GPL(virtio_break_device); 3645 3646 /* 3647 * This should allow the device to be used by the driver. You may 3648 * need to grab appropriate locks to flush the write to 3649 * vq->broken. This should only be used in some specific case e.g 3650 * (probing and restoring). This function should only be called by the 3651 * core, not directly by the driver. 3652 */ 3653 void __virtio_unbreak_device(struct virtio_device *dev) 3654 { 3655 struct virtqueue *_vq; 3656 3657 spin_lock(&dev->vqs_list_lock); 3658 list_for_each_entry(_vq, &dev->vqs, list) { 3659 struct vring_virtqueue *vq = to_vvq(_vq); 3660 3661 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 3662 WRITE_ONCE(vq->broken, false); 3663 } 3664 spin_unlock(&dev->vqs_list_lock); 3665 } 3666 EXPORT_SYMBOL_GPL(__virtio_unbreak_device); 3667 3668 dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq) 3669 { 3670 const struct vring_virtqueue *vq = to_vvq(_vq); 3671 3672 BUG_ON(!vq->we_own_ring); 3673 3674 if (virtqueue_is_packed(vq)) 3675 return vq->packed.ring_dma_addr; 3676 3677 return vq->split.queue_dma_addr; 3678 } 3679 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 3680 3681 dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq) 3682 { 3683 const struct vring_virtqueue *vq = to_vvq(_vq); 3684 3685 BUG_ON(!vq->we_own_ring); 3686 3687 if (virtqueue_is_packed(vq)) 3688 return vq->packed.driver_event_dma_addr; 3689 3690 return vq->split.queue_dma_addr + 3691 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 3692 } 3693 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 3694 3695 dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq) 3696 { 3697 const struct vring_virtqueue *vq = to_vvq(_vq); 3698 3699 BUG_ON(!vq->we_own_ring); 3700 3701 if (virtqueue_is_packed(vq)) 3702 return vq->packed.device_event_dma_addr; 3703 3704 return vq->split.queue_dma_addr + 3705 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 3706 } 3707 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 3708 3709 /* Only available for split ring */ 3710 const struct vring *virtqueue_get_vring(const struct virtqueue *vq) 3711 { 3712 return &to_vvq(vq)->split.vring; 3713 } 3714 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 3715 3716 /** 3717 * virtqueue_map_alloc_coherent - alloc coherent mapping 3718 * @vdev: the virtio device we are talking to 3719 * @map: metadata for performing mapping 3720 * @size: the size of the buffer 3721 * @map_handle: the pointer to the mapped address 3722 * @gfp: allocation flag (GFP_XXX) 3723 * 3724 * return virtual address or NULL on error 3725 */ 3726 void *virtqueue_map_alloc_coherent(struct virtio_device *vdev, 3727 union virtio_map map, 3728 size_t size, dma_addr_t *map_handle, 3729 gfp_t gfp) 3730 { 3731 if (vdev->map) 3732 return vdev->map->alloc(map, size, 3733 map_handle, gfp); 3734 else 3735 return dma_alloc_coherent(map.dma_dev, size, 3736 map_handle, gfp); 3737 } 3738 EXPORT_SYMBOL_GPL(virtqueue_map_alloc_coherent); 3739 3740 /** 3741 * virtqueue_map_free_coherent - free coherent mapping 3742 * @vdev: the virtio device we are talking to 3743 * @map: metadata for performing mapping 3744 * @size: the size of the buffer 3745 * @vaddr: the virtual address that needs to be freed 3746 * @map_handle: the mapped address that needs to be freed 3747 * 3748 */ 3749 void virtqueue_map_free_coherent(struct virtio_device *vdev, 3750 union virtio_map map, size_t size, void *vaddr, 3751 dma_addr_t map_handle) 3752 { 3753 if (vdev->map) 3754 vdev->map->free(map, size, vaddr, 3755 map_handle, 0); 3756 else 3757 dma_free_coherent(map.dma_dev, size, vaddr, map_handle); 3758 } 3759 EXPORT_SYMBOL_GPL(virtqueue_map_free_coherent); 3760 3761 /** 3762 * virtqueue_map_page_attrs - map a page to the device 3763 * @_vq: the virtqueue we are talking to 3764 * @page: the page that will be mapped by the device 3765 * @offset: the offset in the page for a buffer 3766 * @size: the buffer size 3767 * @dir: mapping direction 3768 * @attrs: mapping attributes 3769 * 3770 * Returns mapped address. Caller should check that by virtqueue_map_mapping_error(). 3771 */ 3772 dma_addr_t virtqueue_map_page_attrs(const struct virtqueue *_vq, 3773 struct page *page, 3774 unsigned long offset, 3775 size_t size, 3776 enum dma_data_direction dir, 3777 unsigned long attrs) 3778 { 3779 const struct vring_virtqueue *vq = to_vvq(_vq); 3780 struct virtio_device *vdev = _vq->vdev; 3781 3782 if (vdev->map) 3783 return vdev->map->map_page(vq->map, 3784 page, offset, size, 3785 dir, attrs); 3786 3787 return dma_map_page_attrs(vring_dma_dev(vq), 3788 page, offset, size, 3789 dir, attrs); 3790 } 3791 EXPORT_SYMBOL_GPL(virtqueue_map_page_attrs); 3792 3793 /** 3794 * virtqueue_unmap_page_attrs - map a page to the device 3795 * @_vq: the virtqueue we are talking to 3796 * @map_handle: the mapped address 3797 * @size: the buffer size 3798 * @dir: mapping direction 3799 * @attrs: unmapping attributes 3800 */ 3801 void virtqueue_unmap_page_attrs(const struct virtqueue *_vq, 3802 dma_addr_t map_handle, 3803 size_t size, enum dma_data_direction dir, 3804 unsigned long attrs) 3805 { 3806 const struct vring_virtqueue *vq = to_vvq(_vq); 3807 struct virtio_device *vdev = _vq->vdev; 3808 3809 if (vdev->map) 3810 vdev->map->unmap_page(vq->map, 3811 map_handle, size, dir, attrs); 3812 else 3813 dma_unmap_page_attrs(vring_dma_dev(vq), map_handle, 3814 size, dir, attrs); 3815 } 3816 EXPORT_SYMBOL_GPL(virtqueue_unmap_page_attrs); 3817 3818 /** 3819 * virtqueue_map_single_attrs - map DMA for _vq 3820 * @_vq: the struct virtqueue we're talking about. 3821 * @ptr: the pointer of the buffer to do dma 3822 * @size: the size of the buffer to do dma 3823 * @dir: DMA direction 3824 * @attrs: DMA Attrs 3825 * 3826 * The caller calls this to do dma mapping in advance. The DMA address can be 3827 * passed to this _vq when it is in pre-mapped mode. 3828 * 3829 * return mapped address. Caller should check that by virtqueue_map_mapping_error(). 3830 */ 3831 dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr, 3832 size_t size, 3833 enum dma_data_direction dir, 3834 unsigned long attrs) 3835 { 3836 const struct vring_virtqueue *vq = to_vvq(_vq); 3837 3838 if (!vq->use_map_api) { 3839 kmsan_handle_dma(virt_to_phys(ptr), size, dir); 3840 return (dma_addr_t)virt_to_phys(ptr); 3841 } 3842 3843 /* DMA must never operate on areas that might be remapped. */ 3844 if (dev_WARN_ONCE(&_vq->vdev->dev, is_vmalloc_addr(ptr), 3845 "rejecting DMA map of vmalloc memory\n")) 3846 return DMA_MAPPING_ERROR; 3847 3848 return virtqueue_map_page_attrs(&vq->vq, virt_to_page(ptr), 3849 offset_in_page(ptr), size, dir, attrs); 3850 } 3851 EXPORT_SYMBOL_GPL(virtqueue_map_single_attrs); 3852 3853 /** 3854 * virtqueue_unmap_single_attrs - unmap map for _vq 3855 * @_vq: the struct virtqueue we're talking about. 3856 * @addr: the dma address to unmap 3857 * @size: the size of the buffer 3858 * @dir: DMA direction 3859 * @attrs: DMA Attrs 3860 * 3861 * Unmap the address that is mapped by the virtqueue_map_* APIs. 3862 * 3863 */ 3864 void virtqueue_unmap_single_attrs(const struct virtqueue *_vq, 3865 dma_addr_t addr, 3866 size_t size, enum dma_data_direction dir, 3867 unsigned long attrs) 3868 { 3869 const struct vring_virtqueue *vq = to_vvq(_vq); 3870 3871 if (!vq->use_map_api) 3872 return; 3873 3874 virtqueue_unmap_page_attrs(_vq, addr, size, dir, attrs); 3875 } 3876 EXPORT_SYMBOL_GPL(virtqueue_unmap_single_attrs); 3877 3878 /** 3879 * virtqueue_map_mapping_error - check dma address 3880 * @_vq: the struct virtqueue we're talking about. 3881 * @addr: DMA address 3882 * 3883 * Returns 0 means dma valid. Other means invalid dma address. 3884 */ 3885 int virtqueue_map_mapping_error(const struct virtqueue *_vq, dma_addr_t addr) 3886 { 3887 const struct vring_virtqueue *vq = to_vvq(_vq); 3888 3889 return vring_mapping_error(vq, addr); 3890 } 3891 EXPORT_SYMBOL_GPL(virtqueue_map_mapping_error); 3892 3893 /** 3894 * virtqueue_map_need_sync - check a dma address needs sync 3895 * @_vq: the struct virtqueue we're talking about. 3896 * @addr: DMA address 3897 * 3898 * Check if the dma address mapped by the virtqueue_map_* APIs needs to be 3899 * synchronized 3900 * 3901 * return bool 3902 */ 3903 bool virtqueue_map_need_sync(const struct virtqueue *_vq, dma_addr_t addr) 3904 { 3905 const struct vring_virtqueue *vq = to_vvq(_vq); 3906 struct virtio_device *vdev = _vq->vdev; 3907 3908 if (!vq->use_map_api) 3909 return false; 3910 3911 if (vdev->map) 3912 return vdev->map->need_sync(vq->map, addr); 3913 else 3914 return dma_need_sync(vring_dma_dev(vq), addr); 3915 } 3916 EXPORT_SYMBOL_GPL(virtqueue_map_need_sync); 3917 3918 /** 3919 * virtqueue_map_sync_single_range_for_cpu - map sync for cpu 3920 * @_vq: the struct virtqueue we're talking about. 3921 * @addr: DMA address 3922 * @offset: DMA address offset 3923 * @size: buf size for sync 3924 * @dir: DMA direction 3925 * 3926 * Before calling this function, use virtqueue_map_need_sync() to confirm that 3927 * the DMA address really needs to be synchronized 3928 * 3929 */ 3930 void virtqueue_map_sync_single_range_for_cpu(const struct virtqueue *_vq, 3931 dma_addr_t addr, 3932 unsigned long offset, size_t size, 3933 enum dma_data_direction dir) 3934 { 3935 const struct vring_virtqueue *vq = to_vvq(_vq); 3936 struct virtio_device *vdev = _vq->vdev; 3937 3938 if (!vq->use_map_api) 3939 return; 3940 3941 if (vdev->map) 3942 vdev->map->sync_single_for_cpu(vq->map, 3943 addr + offset, size, dir); 3944 else 3945 dma_sync_single_range_for_cpu(vring_dma_dev(vq), 3946 addr, offset, size, dir); 3947 } 3948 EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_cpu); 3949 3950 /** 3951 * virtqueue_map_sync_single_range_for_device - map sync for device 3952 * @_vq: the struct virtqueue we're talking about. 3953 * @addr: DMA address 3954 * @offset: DMA address offset 3955 * @size: buf size for sync 3956 * @dir: DMA direction 3957 * 3958 * Before calling this function, use virtqueue_map_need_sync() to confirm that 3959 * the DMA address really needs to be synchronized 3960 */ 3961 void virtqueue_map_sync_single_range_for_device(const struct virtqueue *_vq, 3962 dma_addr_t addr, 3963 unsigned long offset, size_t size, 3964 enum dma_data_direction dir) 3965 { 3966 const struct vring_virtqueue *vq = to_vvq(_vq); 3967 struct virtio_device *vdev = _vq->vdev; 3968 3969 if (!vq->use_map_api) 3970 return; 3971 3972 if (vdev->map) 3973 vdev->map->sync_single_for_device(vq->map, 3974 addr + offset, 3975 size, dir); 3976 else 3977 dma_sync_single_range_for_device(vring_dma_dev(vq), addr, 3978 offset, size, dir); 3979 } 3980 EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_device); 3981 3982 MODULE_DESCRIPTION("Virtio ring implementation"); 3983 MODULE_LICENSE("GPL"); 3984