1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Virtio ring implementation. 3 * 4 * Copyright 2007 Rusty Russell IBM Corporation 5 */ 6 #include <linux/virtio.h> 7 #include <linux/virtio_ring.h> 8 #include <linux/virtio_config.h> 9 #include <linux/device.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/hrtimer.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/kmsan.h> 15 #include <linux/spinlock.h> 16 #include <xen/xen.h> 17 18 #ifdef DEBUG 19 /* For development, we want to crash whenever the ring is screwed. */ 20 #define BAD_RING(_vq, fmt, args...) \ 21 do { \ 22 dev_err(&(_vq)->vq.vdev->dev, \ 23 "%s:"fmt, (_vq)->vq.name, ##args); \ 24 BUG(); \ 25 } while (0) 26 /* Caller is supposed to guarantee no reentry. */ 27 #define START_USE(_vq) \ 28 do { \ 29 if ((_vq)->in_use) \ 30 panic("%s:in_use = %i\n", \ 31 (_vq)->vq.name, (_vq)->in_use); \ 32 (_vq)->in_use = __LINE__; \ 33 } while (0) 34 #define END_USE(_vq) \ 35 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 36 #define LAST_ADD_TIME_UPDATE(_vq) \ 37 do { \ 38 ktime_t now = ktime_get(); \ 39 \ 40 /* No kick or get, with .1 second between? Warn. */ \ 41 if ((_vq)->last_add_time_valid) \ 42 WARN_ON(ktime_to_ms(ktime_sub(now, \ 43 (_vq)->last_add_time)) > 100); \ 44 (_vq)->last_add_time = now; \ 45 (_vq)->last_add_time_valid = true; \ 46 } while (0) 47 #define LAST_ADD_TIME_CHECK(_vq) \ 48 do { \ 49 if ((_vq)->last_add_time_valid) { \ 50 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 51 (_vq)->last_add_time)) > 100); \ 52 } \ 53 } while (0) 54 #define LAST_ADD_TIME_INVALID(_vq) \ 55 ((_vq)->last_add_time_valid = false) 56 #else 57 #define BAD_RING(_vq, fmt, args...) \ 58 do { \ 59 dev_err(&_vq->vq.vdev->dev, \ 60 "%s:"fmt, (_vq)->vq.name, ##args); \ 61 (_vq)->broken = true; \ 62 } while (0) 63 #define START_USE(vq) 64 #define END_USE(vq) 65 #define LAST_ADD_TIME_UPDATE(vq) 66 #define LAST_ADD_TIME_CHECK(vq) 67 #define LAST_ADD_TIME_INVALID(vq) 68 #endif 69 70 enum vq_layout { 71 VQ_LAYOUT_SPLIT = 0, 72 VQ_LAYOUT_PACKED, 73 VQ_LAYOUT_SPLIT_IN_ORDER, 74 VQ_LAYOUT_PACKED_IN_ORDER, 75 }; 76 77 struct vring_desc_state_split { 78 void *data; /* Data for callback. */ 79 80 /* Indirect desc table and extra table, if any. These two will be 81 * allocated together. So we won't stress more to the memory allocator. 82 */ 83 struct vring_desc *indir_desc; 84 u32 total_in_len; 85 }; 86 87 struct vring_desc_state_packed { 88 void *data; /* Data for callback. */ 89 90 /* Indirect desc table and extra table, if any. These two will be 91 * allocated together. So we won't stress more to the memory allocator. 92 */ 93 struct vring_packed_desc *indir_desc; 94 u16 num; /* Descriptor list length. */ 95 u16 last; /* The last desc state in a list. */ 96 u32 total_in_len; /* In length for the skipped buffer. */ 97 }; 98 99 struct vring_desc_extra { 100 dma_addr_t addr; /* Descriptor DMA addr. */ 101 u32 len; /* Descriptor length. */ 102 u16 flags; /* Descriptor flags. */ 103 u16 next; /* The next desc state in a list. */ 104 }; 105 106 struct vring_virtqueue_split { 107 /* Actual memory layout for this queue. */ 108 struct vring vring; 109 110 /* Last written value to avail->flags */ 111 u16 avail_flags_shadow; 112 113 /* 114 * Last written value to avail->idx in 115 * guest byte order. 116 */ 117 u16 avail_idx_shadow; 118 119 /* Per-descriptor state. */ 120 struct vring_desc_state_split *desc_state; 121 struct vring_desc_extra *desc_extra; 122 123 /* DMA address and size information */ 124 dma_addr_t queue_dma_addr; 125 size_t queue_size_in_bytes; 126 127 /* 128 * The parameters for creating vrings are reserved for creating new 129 * vring. 130 */ 131 u32 vring_align; 132 bool may_reduce_num; 133 }; 134 135 struct vring_virtqueue_packed { 136 /* Actual memory layout for this queue. */ 137 struct { 138 unsigned int num; 139 struct vring_packed_desc *desc; 140 struct vring_packed_desc_event *driver; 141 struct vring_packed_desc_event *device; 142 } vring; 143 144 /* Driver ring wrap counter. */ 145 bool avail_wrap_counter; 146 147 /* Avail used flags. */ 148 u16 avail_used_flags; 149 150 /* Index of the next avail descriptor. */ 151 u16 next_avail_idx; 152 153 /* 154 * Last written value to driver->flags in 155 * guest byte order. 156 */ 157 u16 event_flags_shadow; 158 159 /* Per-descriptor state. */ 160 struct vring_desc_state_packed *desc_state; 161 struct vring_desc_extra *desc_extra; 162 163 /* DMA address and size information */ 164 dma_addr_t ring_dma_addr; 165 dma_addr_t driver_event_dma_addr; 166 dma_addr_t device_event_dma_addr; 167 size_t ring_size_in_bytes; 168 size_t event_size_in_bytes; 169 }; 170 171 struct vring_virtqueue; 172 173 struct virtqueue_ops { 174 int (*add)(struct vring_virtqueue *vq, struct scatterlist *sgs[], 175 unsigned int total_sg, unsigned int out_sgs, 176 unsigned int in_sgs, void *data, 177 void *ctx, bool premapped, gfp_t gfp, 178 unsigned long attr); 179 void *(*get)(struct vring_virtqueue *vq, unsigned int *len, void **ctx); 180 bool (*kick_prepare)(struct vring_virtqueue *vq); 181 void (*disable_cb)(struct vring_virtqueue *vq); 182 bool (*enable_cb_delayed)(struct vring_virtqueue *vq); 183 unsigned int (*enable_cb_prepare)(struct vring_virtqueue *vq); 184 bool (*poll)(const struct vring_virtqueue *vq, 185 unsigned int last_used_idx); 186 void *(*detach_unused_buf)(struct vring_virtqueue *vq); 187 bool (*more_used)(const struct vring_virtqueue *vq); 188 int (*resize)(struct vring_virtqueue *vq, u32 num); 189 void (*reset)(struct vring_virtqueue *vq); 190 }; 191 192 struct vring_virtqueue { 193 struct virtqueue vq; 194 195 /* Is DMA API used? */ 196 bool use_map_api; 197 198 /* Can we use weak barriers? */ 199 bool weak_barriers; 200 201 /* Other side has made a mess, don't try any more. */ 202 bool broken; 203 204 /* Host supports indirect buffers */ 205 bool indirect; 206 207 /* Host publishes avail event idx */ 208 bool event; 209 210 enum vq_layout layout; 211 212 /* 213 * Without IN_ORDER it's the head of free buffer list. With 214 * IN_ORDER and SPLIT, it's the next available buffer 215 * index. With IN_ORDER and PACKED, it's unused. 216 */ 217 unsigned int free_head; 218 219 /* 220 * With IN_ORDER, once we see an in-order batch, this stores 221 * this last entry, and until we return the last buffer. 222 * After this, id is set to UINT_MAX to mark it invalid. 223 * Unused without IN_ORDER. 224 */ 225 struct used_entry { 226 u32 id; 227 u32 len; 228 } batch_last; 229 230 /* Number we've added since last sync. */ 231 unsigned int num_added; 232 233 /* Last used index we've seen. 234 * for split ring, it just contains last used index 235 * for packed ring: 236 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index. 237 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter. 238 */ 239 u16 last_used_idx; 240 241 /* With IN_ORDER and SPLIT, last descriptor id we used to 242 * detach buffer. 243 */ 244 u16 last_used; 245 246 /* Hint for event idx: already triggered no need to disable. */ 247 bool event_triggered; 248 249 union { 250 /* Available for split ring */ 251 struct vring_virtqueue_split split; 252 253 /* Available for packed ring */ 254 struct vring_virtqueue_packed packed; 255 }; 256 257 /* How to notify other side. FIXME: commonalize hcalls! */ 258 bool (*notify)(struct virtqueue *vq); 259 260 /* DMA, allocation, and size information */ 261 bool we_own_ring; 262 263 union virtio_map map; 264 265 #ifdef DEBUG 266 /* They're supposed to lock for us. */ 267 unsigned int in_use; 268 269 /* Figure out if their kicks are too delayed. */ 270 bool last_add_time_valid; 271 ktime_t last_add_time; 272 #endif 273 }; 274 275 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num); 276 static void vring_free(struct virtqueue *_vq); 277 278 /* 279 * Helpers. 280 */ 281 282 #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq) 283 284 285 static inline bool virtqueue_is_packed(const struct vring_virtqueue *vq) 286 { 287 return vq->layout == VQ_LAYOUT_PACKED || 288 vq->layout == VQ_LAYOUT_PACKED_IN_ORDER; 289 } 290 291 static inline bool virtqueue_is_in_order(const struct vring_virtqueue *vq) 292 { 293 return vq->layout == VQ_LAYOUT_SPLIT_IN_ORDER || 294 vq->layout == VQ_LAYOUT_PACKED_IN_ORDER; 295 } 296 297 static bool virtqueue_use_indirect(const struct vring_virtqueue *vq, 298 unsigned int total_sg) 299 { 300 /* 301 * If the host supports indirect descriptor tables, and we have multiple 302 * buffers, then go indirect. FIXME: tune this threshold 303 */ 304 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 305 } 306 307 /* 308 * Modern virtio devices have feature bits to specify whether they need a 309 * quirk and bypass the IOMMU. If not there, just use the DMA API. 310 * 311 * If there, the interaction between virtio and DMA API is messy. 312 * 313 * On most systems with virtio, physical addresses match bus addresses, 314 * and it doesn't particularly matter whether we use the DMA API. 315 * 316 * On some systems, including Xen and any system with a physical device 317 * that speaks virtio behind a physical IOMMU, we must use the DMA API 318 * for virtio DMA to work at all. 319 * 320 * On other systems, including SPARC and PPC64, virtio-pci devices are 321 * enumerated as though they are behind an IOMMU, but the virtio host 322 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 323 * there or somehow map everything as the identity. 324 * 325 * For the time being, we preserve historic behavior and bypass the DMA 326 * API. 327 * 328 * TODO: install a per-device DMA ops structure that does the right thing 329 * taking into account all the above quirks, and use the DMA API 330 * unconditionally on data path. 331 */ 332 333 static bool vring_use_map_api(const struct virtio_device *vdev) 334 { 335 if (!virtio_has_dma_quirk(vdev)) 336 return true; 337 338 /* Otherwise, we are left to guess. */ 339 /* 340 * In theory, it's possible to have a buggy QEMU-supposed 341 * emulated Q35 IOMMU and Xen enabled at the same time. On 342 * such a configuration, virtio has never worked and will 343 * not work without an even larger kludge. Instead, enable 344 * the DMA API if we're a Xen guest, which at least allows 345 * all of the sensible Xen configurations to work correctly. 346 */ 347 if (xen_domain()) 348 return true; 349 350 return false; 351 } 352 353 static bool vring_need_unmap_buffer(const struct vring_virtqueue *vring, 354 const struct vring_desc_extra *extra) 355 { 356 return vring->use_map_api && (extra->addr != DMA_MAPPING_ERROR); 357 } 358 359 size_t virtio_max_dma_size(const struct virtio_device *vdev) 360 { 361 size_t max_segment_size = SIZE_MAX; 362 363 if (vring_use_map_api(vdev)) { 364 if (vdev->map) { 365 max_segment_size = 366 vdev->map->max_mapping_size(vdev->vmap); 367 } else 368 max_segment_size = 369 dma_max_mapping_size(vdev->dev.parent); 370 } 371 372 return max_segment_size; 373 } 374 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 375 376 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 377 dma_addr_t *map_handle, gfp_t flag, 378 union virtio_map map) 379 { 380 if (vring_use_map_api(vdev)) { 381 return virtqueue_map_alloc_coherent(vdev, map, size, 382 map_handle, flag); 383 } else { 384 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 385 386 if (queue) { 387 phys_addr_t phys_addr = virt_to_phys(queue); 388 *map_handle = (dma_addr_t)phys_addr; 389 390 /* 391 * Sanity check: make sure we dind't truncate 392 * the address. The only arches I can find that 393 * have 64-bit phys_addr_t but 32-bit dma_addr_t 394 * are certain non-highmem MIPS and x86 395 * configurations, but these configurations 396 * should never allocate physical pages above 32 397 * bits, so this is fine. Just in case, throw a 398 * warning and abort if we end up with an 399 * unrepresentable address. 400 */ 401 if (WARN_ON_ONCE(*map_handle != phys_addr)) { 402 free_pages_exact(queue, PAGE_ALIGN(size)); 403 return NULL; 404 } 405 } 406 return queue; 407 } 408 } 409 410 static void vring_free_queue(struct virtio_device *vdev, size_t size, 411 void *queue, dma_addr_t map_handle, 412 union virtio_map map) 413 { 414 if (vring_use_map_api(vdev)) 415 virtqueue_map_free_coherent(vdev, map, size, 416 queue, map_handle); 417 else 418 free_pages_exact(queue, PAGE_ALIGN(size)); 419 } 420 421 /* 422 * The DMA ops on various arches are rather gnarly right now, and 423 * making all of the arch DMA ops work on the vring device itself 424 * is a mess. 425 */ 426 static struct device *vring_dma_dev(const struct vring_virtqueue *vq) 427 { 428 return vq->map.dma_dev; 429 } 430 431 static int vring_mapping_error(const struct vring_virtqueue *vq, 432 dma_addr_t addr) 433 { 434 struct virtio_device *vdev = vq->vq.vdev; 435 436 if (!vq->use_map_api) 437 return 0; 438 439 if (vdev->map) 440 return vdev->map->mapping_error(vq->map, addr); 441 else 442 return dma_mapping_error(vring_dma_dev(vq), addr); 443 } 444 445 /* Map one sg entry. */ 446 static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg, 447 enum dma_data_direction direction, dma_addr_t *addr, 448 u32 *len, bool premapped, unsigned long attr) 449 { 450 if (premapped) { 451 *addr = sg_dma_address(sg); 452 *len = sg_dma_len(sg); 453 return 0; 454 } 455 456 *len = sg->length; 457 458 if (!vq->use_map_api) { 459 /* 460 * If DMA is not used, KMSAN doesn't know that the scatterlist 461 * is initialized by the hardware. Explicitly check/unpoison it 462 * depending on the direction. 463 */ 464 kmsan_handle_dma(sg_phys(sg), sg->length, direction); 465 *addr = (dma_addr_t)sg_phys(sg); 466 return 0; 467 } 468 469 /* 470 * We can't use dma_map_sg, because we don't use scatterlists in 471 * the way it expects (we don't guarantee that the scatterlist 472 * will exist for the lifetime of the mapping). 473 */ 474 *addr = virtqueue_map_page_attrs(&vq->vq, sg_page(sg), 475 sg->offset, sg->length, 476 direction, attr); 477 478 if (vring_mapping_error(vq, *addr)) 479 return -ENOMEM; 480 481 return 0; 482 } 483 484 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 485 void *cpu_addr, size_t size, 486 enum dma_data_direction direction) 487 { 488 if (!vq->use_map_api) 489 return (dma_addr_t)virt_to_phys(cpu_addr); 490 491 return virtqueue_map_single_attrs(&vq->vq, cpu_addr, 492 size, direction, 0); 493 } 494 495 static void virtqueue_init(struct vring_virtqueue *vq, u32 num) 496 { 497 vq->vq.num_free = num; 498 499 if (virtqueue_is_packed(vq)) 500 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR); 501 else 502 vq->last_used_idx = 0; 503 504 vq->last_used = 0; 505 506 vq->event_triggered = false; 507 vq->num_added = 0; 508 509 #ifdef DEBUG 510 vq->in_use = false; 511 vq->last_add_time_valid = false; 512 #endif 513 } 514 515 516 /* 517 * Split ring specific functions - *_split(). 518 */ 519 520 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, 521 struct vring_desc_extra *extra) 522 { 523 u16 flags; 524 525 flags = extra->flags; 526 527 if (flags & VRING_DESC_F_INDIRECT) { 528 if (!vq->use_map_api) 529 goto out; 530 } else if (!vring_need_unmap_buffer(vq, extra)) 531 goto out; 532 533 virtqueue_unmap_page_attrs(&vq->vq, 534 extra->addr, 535 extra->len, 536 (flags & VRING_DESC_F_WRITE) ? 537 DMA_FROM_DEVICE : DMA_TO_DEVICE, 538 0); 539 540 out: 541 return extra->next; 542 } 543 544 static struct vring_desc *alloc_indirect_split(struct vring_virtqueue *vq, 545 unsigned int total_sg, 546 gfp_t gfp) 547 { 548 struct vring_desc_extra *extra; 549 struct vring_desc *desc; 550 unsigned int i, size; 551 552 /* 553 * We require lowmem mappings for the descriptors because 554 * otherwise virt_to_phys will give us bogus addresses in the 555 * virtqueue. 556 */ 557 gfp &= ~__GFP_HIGHMEM; 558 559 size = sizeof(*desc) * total_sg + sizeof(*extra) * total_sg; 560 561 desc = kmalloc(size, gfp); 562 if (!desc) 563 return NULL; 564 565 extra = (struct vring_desc_extra *)&desc[total_sg]; 566 567 for (i = 0; i < total_sg; i++) 568 extra[i].next = i + 1; 569 570 return desc; 571 } 572 573 static inline unsigned int virtqueue_add_desc_split(struct vring_virtqueue *vq, 574 struct vring_desc *desc, 575 struct vring_desc_extra *extra, 576 unsigned int i, 577 dma_addr_t addr, 578 unsigned int len, 579 u16 flags, bool premapped) 580 { 581 struct virtio_device *vdev = vq->vq.vdev; 582 u16 next; 583 584 desc[i].flags = cpu_to_virtio16(vdev, flags); 585 desc[i].addr = cpu_to_virtio64(vdev, addr); 586 desc[i].len = cpu_to_virtio32(vdev, len); 587 588 extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr; 589 extra[i].len = len; 590 extra[i].flags = flags; 591 592 next = extra[i].next; 593 594 desc[i].next = cpu_to_virtio16(vdev, next); 595 596 return next; 597 } 598 599 static inline int virtqueue_add_split(struct vring_virtqueue *vq, 600 struct scatterlist *sgs[], 601 unsigned int total_sg, 602 unsigned int out_sgs, 603 unsigned int in_sgs, 604 void *data, 605 void *ctx, 606 bool premapped, 607 gfp_t gfp, 608 unsigned long attr) 609 { 610 struct vring_desc_extra *extra; 611 struct scatterlist *sg; 612 struct vring_desc *desc; 613 unsigned int i, n, avail, descs_used, err_idx, sg_count = 0; 614 /* Total length for in-order */ 615 unsigned int total_in_len = 0; 616 int head; 617 bool indirect; 618 619 START_USE(vq); 620 621 BUG_ON(data == NULL); 622 BUG_ON(ctx && vq->indirect); 623 624 if (unlikely(vq->broken)) { 625 END_USE(vq); 626 return -EIO; 627 } 628 629 LAST_ADD_TIME_UPDATE(vq); 630 631 BUG_ON(total_sg == 0); 632 633 head = vq->free_head; 634 635 if (virtqueue_use_indirect(vq, total_sg)) 636 desc = alloc_indirect_split(vq, total_sg, gfp); 637 else { 638 desc = NULL; 639 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 640 } 641 642 if (desc) { 643 /* Use a single buffer which doesn't continue */ 644 indirect = true; 645 /* Set up rest to use this indirect table. */ 646 i = 0; 647 descs_used = 1; 648 extra = (struct vring_desc_extra *)&desc[total_sg]; 649 } else { 650 indirect = false; 651 desc = vq->split.vring.desc; 652 extra = vq->split.desc_extra; 653 i = head; 654 descs_used = total_sg; 655 } 656 657 if (unlikely(vq->vq.num_free < descs_used)) { 658 pr_debug("Can't add buf len %i - avail = %i\n", 659 descs_used, vq->vq.num_free); 660 /* FIXME: for historical reasons, we force a notify here if 661 * there are outgoing parts to the buffer. Presumably the 662 * host should service the ring ASAP. */ 663 if (out_sgs) 664 vq->notify(&vq->vq); 665 if (indirect) 666 kfree(desc); 667 END_USE(vq); 668 return -ENOSPC; 669 } 670 671 for (n = 0; n < out_sgs; n++) { 672 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 673 dma_addr_t addr; 674 u32 len; 675 u16 flags = 0; 676 677 if (++sg_count != total_sg) 678 flags |= VRING_DESC_F_NEXT; 679 680 if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr, &len, 681 premapped, attr)) 682 goto unmap_release; 683 684 /* Note that we trust indirect descriptor 685 * table since it use stream DMA mapping. 686 */ 687 i = virtqueue_add_desc_split(vq, desc, extra, i, addr, 688 len, flags, premapped); 689 } 690 } 691 for (; n < (out_sgs + in_sgs); n++) { 692 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 693 dma_addr_t addr; 694 u32 len; 695 u16 flags = VRING_DESC_F_WRITE; 696 697 if (++sg_count != total_sg) 698 flags |= VRING_DESC_F_NEXT; 699 700 if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr, &len, 701 premapped, attr)) 702 goto unmap_release; 703 704 /* Note that we trust indirect descriptor 705 * table since it use stream DMA mapping. 706 */ 707 i = virtqueue_add_desc_split(vq, desc, extra, i, addr, 708 len, flags, premapped); 709 total_in_len += len; 710 } 711 } 712 713 if (indirect) { 714 /* Now that the indirect table is filled in, map it. */ 715 dma_addr_t addr = vring_map_single( 716 vq, desc, total_sg * sizeof(struct vring_desc), 717 DMA_TO_DEVICE); 718 if (vring_mapping_error(vq, addr)) 719 goto unmap_release; 720 721 virtqueue_add_desc_split(vq, vq->split.vring.desc, 722 vq->split.desc_extra, 723 head, addr, 724 total_sg * sizeof(struct vring_desc), 725 VRING_DESC_F_INDIRECT, false); 726 } 727 728 /* We're using some buffers from the free list. */ 729 vq->vq.num_free -= descs_used; 730 731 /* Update free pointer */ 732 if (virtqueue_is_in_order(vq)) { 733 vq->free_head += descs_used; 734 if (vq->free_head >= vq->split.vring.num) 735 vq->free_head -= vq->split.vring.num; 736 vq->split.desc_state[head].total_in_len = total_in_len; 737 } else if (indirect) 738 vq->free_head = vq->split.desc_extra[head].next; 739 else 740 vq->free_head = i; 741 742 /* Store token and indirect buffer state. */ 743 vq->split.desc_state[head].data = data; 744 if (indirect) 745 vq->split.desc_state[head].indir_desc = desc; 746 else 747 vq->split.desc_state[head].indir_desc = ctx; 748 749 /* Put entry in available array (but don't update avail->idx until they 750 * do sync). */ 751 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 752 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(vq->vq.vdev, head); 753 754 /* Descriptors and available array need to be set before we expose the 755 * new available array entries. */ 756 virtio_wmb(vq->weak_barriers); 757 vq->split.avail_idx_shadow++; 758 vq->split.vring.avail->idx = cpu_to_virtio16(vq->vq.vdev, 759 vq->split.avail_idx_shadow); 760 vq->num_added++; 761 762 pr_debug("Added buffer head %i to %p\n", head, vq); 763 END_USE(vq); 764 765 /* This is very unlikely, but theoretically possible. Kick 766 * just in case. */ 767 if (unlikely(vq->num_added == (1 << 16) - 1)) 768 virtqueue_kick(&vq->vq); 769 770 return 0; 771 772 unmap_release: 773 err_idx = i; 774 775 if (indirect) 776 i = 0; 777 else 778 i = head; 779 780 for (n = 0; n < total_sg; n++) { 781 if (i == err_idx) 782 break; 783 784 i = vring_unmap_one_split(vq, &extra[i]); 785 } 786 787 if (indirect) 788 kfree(desc); 789 790 END_USE(vq); 791 return -ENOMEM; 792 } 793 794 static bool virtqueue_kick_prepare_split(struct vring_virtqueue *vq) 795 { 796 u16 new, old; 797 bool needs_kick; 798 799 START_USE(vq); 800 /* We need to expose available array entries before checking avail 801 * event. */ 802 virtio_mb(vq->weak_barriers); 803 804 old = vq->split.avail_idx_shadow - vq->num_added; 805 new = vq->split.avail_idx_shadow; 806 vq->num_added = 0; 807 808 LAST_ADD_TIME_CHECK(vq); 809 LAST_ADD_TIME_INVALID(vq); 810 811 if (vq->event) { 812 needs_kick = vring_need_event(virtio16_to_cpu(vq->vq.vdev, 813 vring_avail_event(&vq->split.vring)), 814 new, old); 815 } else { 816 needs_kick = !(vq->split.vring.used->flags & 817 cpu_to_virtio16(vq->vq.vdev, 818 VRING_USED_F_NO_NOTIFY)); 819 } 820 END_USE(vq); 821 return needs_kick; 822 } 823 824 static void detach_indirect_split(struct vring_virtqueue *vq, 825 unsigned int head) 826 { 827 struct vring_desc_extra *extra = vq->split.desc_extra; 828 struct vring_desc *indir_desc = vq->split.desc_state[head].indir_desc; 829 unsigned int j; 830 u32 len, num; 831 832 /* Free the indirect table, if any, now that it's unmapped. */ 833 if (!indir_desc) 834 return; 835 len = vq->split.desc_extra[head].len; 836 837 BUG_ON(!(vq->split.desc_extra[head].flags & 838 VRING_DESC_F_INDIRECT)); 839 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 840 841 num = len / sizeof(struct vring_desc); 842 843 extra = (struct vring_desc_extra *)&indir_desc[num]; 844 845 if (vq->use_map_api) { 846 for (j = 0; j < num; j++) 847 vring_unmap_one_split(vq, &extra[j]); 848 } 849 850 kfree(indir_desc); 851 vq->split.desc_state[head].indir_desc = NULL; 852 } 853 854 static unsigned detach_buf_split_in_order(struct vring_virtqueue *vq, 855 unsigned int head, 856 void **ctx) 857 { 858 struct vring_desc_extra *extra; 859 unsigned int i; 860 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 861 862 /* Clear data ptr. */ 863 vq->split.desc_state[head].data = NULL; 864 865 extra = vq->split.desc_extra; 866 867 /* Put back on free list: unmap first-level descriptors and find end */ 868 i = head; 869 870 while (vq->split.vring.desc[i].flags & nextflag) { 871 i = vring_unmap_one_split(vq, &extra[i]); 872 vq->vq.num_free++; 873 } 874 875 vring_unmap_one_split(vq, &extra[i]); 876 877 /* Plus final descriptor */ 878 vq->vq.num_free++; 879 880 if (vq->indirect) 881 detach_indirect_split(vq, head); 882 else if (ctx) 883 *ctx = vq->split.desc_state[head].indir_desc; 884 885 return i; 886 } 887 888 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 889 void **ctx) 890 { 891 unsigned int i = detach_buf_split_in_order(vq, head, ctx); 892 893 vq->split.desc_extra[i].next = vq->free_head; 894 vq->free_head = head; 895 } 896 897 static bool virtqueue_poll_split(const struct vring_virtqueue *vq, 898 unsigned int last_used_idx) 899 { 900 return (u16)last_used_idx != virtio16_to_cpu(vq->vq.vdev, 901 vq->split.vring.used->idx); 902 } 903 904 static bool more_used_split(const struct vring_virtqueue *vq) 905 { 906 return virtqueue_poll_split(vq, vq->last_used_idx); 907 } 908 909 static bool more_used_split_in_order(const struct vring_virtqueue *vq) 910 { 911 if (vq->batch_last.id != UINT_MAX) 912 return true; 913 914 return virtqueue_poll_split(vq, vq->last_used_idx); 915 } 916 917 static void *virtqueue_get_buf_ctx_split(struct vring_virtqueue *vq, 918 unsigned int *len, 919 void **ctx) 920 { 921 void *ret; 922 unsigned int i; 923 u16 last_used; 924 925 START_USE(vq); 926 927 if (unlikely(vq->broken)) { 928 END_USE(vq); 929 return NULL; 930 } 931 932 if (!more_used_split(vq)) { 933 pr_debug("No more buffers in queue\n"); 934 END_USE(vq); 935 return NULL; 936 } 937 938 /* Only get used array entries after they have been exposed by host. */ 939 virtio_rmb(vq->weak_barriers); 940 941 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 942 i = virtio32_to_cpu(vq->vq.vdev, 943 vq->split.vring.used->ring[last_used].id); 944 *len = virtio32_to_cpu(vq->vq.vdev, 945 vq->split.vring.used->ring[last_used].len); 946 947 if (unlikely(i >= vq->split.vring.num)) { 948 BAD_RING(vq, "id %u out of range\n", i); 949 return NULL; 950 } 951 if (unlikely(!vq->split.desc_state[i].data)) { 952 BAD_RING(vq, "id %u is not a head!\n", i); 953 return NULL; 954 } 955 956 /* detach_buf_split clears data, so grab it now. */ 957 ret = vq->split.desc_state[i].data; 958 detach_buf_split(vq, i, ctx); 959 vq->last_used_idx++; 960 /* If we expect an interrupt for the next entry, tell host 961 * by writing event index and flush out the write before 962 * the read in the next get_buf call. */ 963 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 964 virtio_store_mb(vq->weak_barriers, 965 &vring_used_event(&vq->split.vring), 966 cpu_to_virtio16(vq->vq.vdev, vq->last_used_idx)); 967 968 LAST_ADD_TIME_INVALID(vq); 969 970 END_USE(vq); 971 return ret; 972 } 973 974 static void *virtqueue_get_buf_ctx_split_in_order(struct vring_virtqueue *vq, 975 unsigned int *len, 976 void **ctx) 977 { 978 void *ret; 979 unsigned int num = vq->split.vring.num; 980 unsigned int num_free = vq->vq.num_free; 981 u16 last_used, last_used_idx; 982 983 START_USE(vq); 984 985 if (unlikely(vq->broken)) { 986 END_USE(vq); 987 return NULL; 988 } 989 990 last_used = vq->last_used & (num - 1); 991 last_used_idx = vq->last_used_idx & (num - 1); 992 993 if (vq->batch_last.id == UINT_MAX) { 994 if (!more_used_split_in_order(vq)) { 995 pr_debug("No more buffers in queue\n"); 996 END_USE(vq); 997 return NULL; 998 } 999 1000 /* 1001 * Only get used array entries after they have been 1002 * exposed by host. 1003 */ 1004 virtio_rmb(vq->weak_barriers); 1005 1006 vq->batch_last.id = virtio32_to_cpu(vq->vq.vdev, 1007 vq->split.vring.used->ring[last_used_idx].id); 1008 vq->batch_last.len = virtio32_to_cpu(vq->vq.vdev, 1009 vq->split.vring.used->ring[last_used_idx].len); 1010 } 1011 1012 if (vq->batch_last.id == last_used) { 1013 vq->batch_last.id = UINT_MAX; 1014 *len = vq->batch_last.len; 1015 } else { 1016 *len = vq->split.desc_state[last_used].total_in_len; 1017 } 1018 1019 if (unlikely(!vq->split.desc_state[last_used].data)) { 1020 BAD_RING(vq, "id %u is not a head!\n", last_used); 1021 return NULL; 1022 } 1023 1024 /* detach_buf_split clears data, so grab it now. */ 1025 ret = vq->split.desc_state[last_used].data; 1026 detach_buf_split_in_order(vq, last_used, ctx); 1027 1028 vq->last_used_idx++; 1029 vq->last_used += (vq->vq.num_free - num_free); 1030 /* If we expect an interrupt for the next entry, tell host 1031 * by writing event index and flush out the write before 1032 * the read in the next get_buf call. */ 1033 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 1034 virtio_store_mb(vq->weak_barriers, 1035 &vring_used_event(&vq->split.vring), 1036 cpu_to_virtio16(vq->vq.vdev, vq->last_used_idx)); 1037 1038 LAST_ADD_TIME_INVALID(vq); 1039 1040 END_USE(vq); 1041 return ret; 1042 } 1043 1044 static void virtqueue_disable_cb_split(struct vring_virtqueue *vq) 1045 { 1046 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 1047 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 1048 1049 /* 1050 * If device triggered an event already it won't trigger one again: 1051 * no need to disable. 1052 */ 1053 if (vq->event_triggered) 1054 return; 1055 1056 if (vq->event) 1057 /* TODO: this is a hack. Figure out a cleaner value to write. */ 1058 vring_used_event(&vq->split.vring) = 0x0; 1059 else 1060 vq->split.vring.avail->flags = 1061 cpu_to_virtio16(vq->vq.vdev, 1062 vq->split.avail_flags_shadow); 1063 } 1064 } 1065 1066 static unsigned int virtqueue_enable_cb_prepare_split(struct vring_virtqueue *vq) 1067 { 1068 u16 last_used_idx; 1069 1070 START_USE(vq); 1071 1072 /* We optimistically turn back on interrupts, then check if there was 1073 * more to do. */ 1074 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 1075 * either clear the flags bit or point the event index at the next 1076 * entry. Always do both to keep code simple. */ 1077 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 1078 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 1079 if (!vq->event) 1080 vq->split.vring.avail->flags = 1081 cpu_to_virtio16(vq->vq.vdev, 1082 vq->split.avail_flags_shadow); 1083 } 1084 vring_used_event(&vq->split.vring) = cpu_to_virtio16(vq->vq.vdev, 1085 last_used_idx = vq->last_used_idx); 1086 END_USE(vq); 1087 return last_used_idx; 1088 } 1089 1090 static bool virtqueue_enable_cb_delayed_split(struct vring_virtqueue *vq) 1091 { 1092 u16 bufs; 1093 1094 START_USE(vq); 1095 1096 /* We optimistically turn back on interrupts, then check if there was 1097 * more to do. */ 1098 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 1099 * either clear the flags bit or point the event index at the next 1100 * entry. Always update the event index to keep code simple. */ 1101 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 1102 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 1103 if (!vq->event) 1104 vq->split.vring.avail->flags = 1105 cpu_to_virtio16(vq->vq.vdev, 1106 vq->split.avail_flags_shadow); 1107 } 1108 /* TODO: tune this threshold */ 1109 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 1110 1111 virtio_store_mb(vq->weak_barriers, 1112 &vring_used_event(&vq->split.vring), 1113 cpu_to_virtio16(vq->vq.vdev, vq->last_used_idx + bufs)); 1114 1115 if (unlikely((u16)(virtio16_to_cpu(vq->vq.vdev, vq->split.vring.used->idx) 1116 - vq->last_used_idx) > bufs)) { 1117 END_USE(vq); 1118 return false; 1119 } 1120 1121 END_USE(vq); 1122 return true; 1123 } 1124 1125 static void *virtqueue_detach_unused_buf_split(struct vring_virtqueue *vq) 1126 { 1127 unsigned int i; 1128 void *buf; 1129 1130 START_USE(vq); 1131 1132 for (i = 0; i < vq->split.vring.num; i++) { 1133 if (!vq->split.desc_state[i].data) 1134 continue; 1135 /* detach_buf_split clears data, so grab it now. */ 1136 buf = vq->split.desc_state[i].data; 1137 if (virtqueue_is_in_order(vq)) 1138 detach_buf_split_in_order(vq, i, NULL); 1139 else 1140 detach_buf_split(vq, i, NULL); 1141 vq->split.avail_idx_shadow--; 1142 vq->split.vring.avail->idx = cpu_to_virtio16(vq->vq.vdev, 1143 vq->split.avail_idx_shadow); 1144 END_USE(vq); 1145 return buf; 1146 } 1147 /* That should have freed everything. */ 1148 BUG_ON(vq->vq.num_free != vq->split.vring.num); 1149 1150 END_USE(vq); 1151 return NULL; 1152 } 1153 1154 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split, 1155 struct vring_virtqueue *vq) 1156 { 1157 struct virtio_device *vdev; 1158 1159 vdev = vq->vq.vdev; 1160 1161 vring_split->avail_flags_shadow = 0; 1162 vring_split->avail_idx_shadow = 0; 1163 1164 /* No callback? Tell other side not to bother us. */ 1165 if (!vq->vq.callback) { 1166 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 1167 if (!vq->event) 1168 vring_split->vring.avail->flags = cpu_to_virtio16(vdev, 1169 vring_split->avail_flags_shadow); 1170 } 1171 } 1172 1173 static void virtqueue_reset_split(struct vring_virtqueue *vq) 1174 { 1175 int num; 1176 1177 num = vq->split.vring.num; 1178 1179 vq->split.vring.avail->flags = 0; 1180 vq->split.vring.avail->idx = 0; 1181 1182 /* reset avail event */ 1183 vq->split.vring.avail->ring[num] = 0; 1184 1185 vq->split.vring.used->flags = 0; 1186 vq->split.vring.used->idx = 0; 1187 1188 /* reset used event */ 1189 *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0; 1190 1191 virtqueue_init(vq, num); 1192 1193 virtqueue_vring_init_split(&vq->split, vq); 1194 } 1195 1196 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq, 1197 struct vring_virtqueue_split *vring_split) 1198 { 1199 vq->split = *vring_split; 1200 1201 /* Put everything in free lists. */ 1202 vq->free_head = 0; 1203 vq->batch_last.id = UINT_MAX; 1204 } 1205 1206 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split) 1207 { 1208 struct vring_desc_state_split *state; 1209 struct vring_desc_extra *extra; 1210 u32 num = vring_split->vring.num; 1211 1212 state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL); 1213 if (!state) 1214 goto err_state; 1215 1216 extra = vring_alloc_desc_extra(num); 1217 if (!extra) 1218 goto err_extra; 1219 1220 memset(state, 0, num * sizeof(struct vring_desc_state_split)); 1221 1222 vring_split->desc_state = state; 1223 vring_split->desc_extra = extra; 1224 return 0; 1225 1226 err_extra: 1227 kfree(state); 1228 err_state: 1229 return -ENOMEM; 1230 } 1231 1232 static void vring_free_split(struct vring_virtqueue_split *vring_split, 1233 struct virtio_device *vdev, 1234 union virtio_map map) 1235 { 1236 vring_free_queue(vdev, vring_split->queue_size_in_bytes, 1237 vring_split->vring.desc, 1238 vring_split->queue_dma_addr, 1239 map); 1240 1241 kfree(vring_split->desc_state); 1242 kfree(vring_split->desc_extra); 1243 } 1244 1245 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, 1246 struct virtio_device *vdev, 1247 u32 num, 1248 unsigned int vring_align, 1249 bool may_reduce_num, 1250 union virtio_map map) 1251 { 1252 void *queue = NULL; 1253 dma_addr_t dma_addr; 1254 1255 /* We assume num is a power of 2. */ 1256 if (!is_power_of_2(num)) { 1257 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 1258 return -EINVAL; 1259 } 1260 1261 /* TODO: allocate each queue chunk individually */ 1262 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 1263 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1264 &dma_addr, 1265 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1266 map); 1267 if (queue) 1268 break; 1269 if (!may_reduce_num) 1270 return -ENOMEM; 1271 } 1272 1273 if (!num) 1274 return -ENOMEM; 1275 1276 if (!queue) { 1277 /* Try to get a single page. You are my only hope! */ 1278 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1279 &dma_addr, GFP_KERNEL | __GFP_ZERO, 1280 map); 1281 } 1282 if (!queue) 1283 return -ENOMEM; 1284 1285 vring_init(&vring_split->vring, num, queue, vring_align); 1286 1287 vring_split->queue_dma_addr = dma_addr; 1288 vring_split->queue_size_in_bytes = vring_size(num, vring_align); 1289 1290 vring_split->vring_align = vring_align; 1291 vring_split->may_reduce_num = may_reduce_num; 1292 1293 return 0; 1294 } 1295 1296 static const struct virtqueue_ops split_ops; 1297 1298 static struct virtqueue *__vring_new_virtqueue_split(unsigned int index, 1299 struct vring_virtqueue_split *vring_split, 1300 struct virtio_device *vdev, 1301 bool weak_barriers, 1302 bool context, 1303 bool (*notify)(struct virtqueue *), 1304 void (*callback)(struct virtqueue *), 1305 const char *name, 1306 union virtio_map map) 1307 { 1308 struct vring_virtqueue *vq; 1309 int err; 1310 1311 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 1312 if (!vq) 1313 return NULL; 1314 1315 vq->vq.callback = callback; 1316 vq->vq.vdev = vdev; 1317 vq->vq.name = name; 1318 vq->vq.index = index; 1319 vq->vq.reset = false; 1320 vq->we_own_ring = false; 1321 vq->notify = notify; 1322 vq->weak_barriers = weak_barriers; 1323 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 1324 vq->broken = true; 1325 #else 1326 vq->broken = false; 1327 #endif 1328 vq->map = map; 1329 vq->use_map_api = vring_use_map_api(vdev); 1330 1331 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1332 !context; 1333 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1334 vq->layout = virtio_has_feature(vdev, VIRTIO_F_IN_ORDER) ? 1335 VQ_LAYOUT_SPLIT_IN_ORDER : VQ_LAYOUT_SPLIT; 1336 1337 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 1338 vq->weak_barriers = false; 1339 1340 err = vring_alloc_state_extra_split(vring_split); 1341 if (err) { 1342 kfree(vq); 1343 return NULL; 1344 } 1345 1346 virtqueue_vring_init_split(vring_split, vq); 1347 1348 virtqueue_init(vq, vring_split->vring.num); 1349 virtqueue_vring_attach_split(vq, vring_split); 1350 1351 spin_lock(&vdev->vqs_list_lock); 1352 list_add_tail(&vq->vq.list, &vdev->vqs); 1353 spin_unlock(&vdev->vqs_list_lock); 1354 return &vq->vq; 1355 } 1356 1357 static struct virtqueue *vring_create_virtqueue_split( 1358 unsigned int index, 1359 unsigned int num, 1360 unsigned int vring_align, 1361 struct virtio_device *vdev, 1362 bool weak_barriers, 1363 bool may_reduce_num, 1364 bool context, 1365 bool (*notify)(struct virtqueue *), 1366 void (*callback)(struct virtqueue *), 1367 const char *name, 1368 union virtio_map map) 1369 { 1370 struct vring_virtqueue_split vring_split = {}; 1371 struct virtqueue *vq; 1372 int err; 1373 1374 err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align, 1375 may_reduce_num, map); 1376 if (err) 1377 return NULL; 1378 1379 vq = __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers, 1380 context, notify, callback, name, map); 1381 if (!vq) { 1382 vring_free_split(&vring_split, vdev, map); 1383 return NULL; 1384 } 1385 1386 to_vvq(vq)->we_own_ring = true; 1387 1388 return vq; 1389 } 1390 1391 static int virtqueue_resize_split(struct vring_virtqueue *vq, u32 num) 1392 { 1393 struct vring_virtqueue_split vring_split = {}; 1394 struct virtio_device *vdev = vq->vq.vdev; 1395 int err; 1396 1397 err = vring_alloc_queue_split(&vring_split, vdev, num, 1398 vq->split.vring_align, 1399 vq->split.may_reduce_num, 1400 vq->map); 1401 if (err) 1402 goto err; 1403 1404 err = vring_alloc_state_extra_split(&vring_split); 1405 if (err) 1406 goto err_state_extra; 1407 1408 vring_free(&vq->vq); 1409 1410 virtqueue_vring_init_split(&vring_split, vq); 1411 1412 virtqueue_init(vq, vring_split.vring.num); 1413 virtqueue_vring_attach_split(vq, &vring_split); 1414 1415 return 0; 1416 1417 err_state_extra: 1418 vring_free_split(&vring_split, vdev, vq->map); 1419 err: 1420 virtqueue_reset_split(vq); 1421 return -ENOMEM; 1422 } 1423 1424 1425 /* 1426 * Packed ring specific functions - *_packed(). 1427 */ 1428 static bool packed_used_wrap_counter(u16 last_used_idx) 1429 { 1430 return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 1431 } 1432 1433 static u16 packed_last_used(u16 last_used_idx) 1434 { 1435 return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 1436 } 1437 1438 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, 1439 const struct vring_desc_extra *extra) 1440 { 1441 u16 flags; 1442 1443 flags = extra->flags; 1444 1445 if (flags & VRING_DESC_F_INDIRECT) { 1446 if (!vq->use_map_api) 1447 return; 1448 } else if (!vring_need_unmap_buffer(vq, extra)) 1449 return; 1450 1451 virtqueue_unmap_page_attrs(&vq->vq, 1452 extra->addr, extra->len, 1453 (flags & VRING_DESC_F_WRITE) ? 1454 DMA_FROM_DEVICE : DMA_TO_DEVICE, 1455 0); 1456 } 1457 1458 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 1459 gfp_t gfp) 1460 { 1461 struct vring_desc_extra *extra; 1462 struct vring_packed_desc *desc; 1463 int i, size; 1464 1465 /* 1466 * We require lowmem mappings for the descriptors because 1467 * otherwise virt_to_phys will give us bogus addresses in the 1468 * virtqueue. 1469 */ 1470 gfp &= ~__GFP_HIGHMEM; 1471 1472 size = (sizeof(*desc) + sizeof(*extra)) * total_sg; 1473 1474 desc = kmalloc(size, gfp); 1475 if (!desc) 1476 return NULL; 1477 1478 extra = (struct vring_desc_extra *)&desc[total_sg]; 1479 1480 for (i = 0; i < total_sg; i++) 1481 extra[i].next = i + 1; 1482 1483 return desc; 1484 } 1485 1486 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 1487 struct scatterlist *sgs[], 1488 unsigned int total_sg, 1489 unsigned int out_sgs, 1490 unsigned int in_sgs, 1491 void *data, 1492 bool premapped, 1493 gfp_t gfp, 1494 u16 id, 1495 unsigned long attr) 1496 { 1497 struct vring_desc_extra *extra; 1498 struct vring_packed_desc *desc; 1499 struct scatterlist *sg; 1500 unsigned int i, n, err_idx, len, total_in_len = 0; 1501 u16 head; 1502 dma_addr_t addr; 1503 1504 head = vq->packed.next_avail_idx; 1505 desc = alloc_indirect_packed(total_sg, gfp); 1506 if (!desc) 1507 return -ENOMEM; 1508 1509 extra = (struct vring_desc_extra *)&desc[total_sg]; 1510 1511 if (unlikely(vq->vq.num_free < 1)) { 1512 pr_debug("Can't add buf len 1 - avail = 0\n"); 1513 kfree(desc); 1514 END_USE(vq); 1515 return -ENOSPC; 1516 } 1517 1518 i = 0; 1519 1520 for (n = 0; n < out_sgs + in_sgs; n++) { 1521 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1522 if (vring_map_one_sg(vq, sg, n < out_sgs ? 1523 DMA_TO_DEVICE : DMA_FROM_DEVICE, 1524 &addr, &len, premapped, attr)) 1525 goto unmap_release; 1526 1527 desc[i].flags = cpu_to_le16(n < out_sgs ? 1528 0 : VRING_DESC_F_WRITE); 1529 desc[i].addr = cpu_to_le64(addr); 1530 desc[i].len = cpu_to_le32(len); 1531 1532 if (unlikely(vq->use_map_api)) { 1533 extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr; 1534 extra[i].len = len; 1535 extra[i].flags = n < out_sgs ? 0 : VRING_DESC_F_WRITE; 1536 } 1537 1538 if (n >= out_sgs) 1539 total_in_len += len; 1540 i++; 1541 } 1542 } 1543 1544 /* Now that the indirect table is filled in, map it. */ 1545 addr = vring_map_single(vq, desc, 1546 total_sg * sizeof(struct vring_packed_desc), 1547 DMA_TO_DEVICE); 1548 if (vring_mapping_error(vq, addr)) 1549 goto unmap_release; 1550 1551 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1552 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1553 sizeof(struct vring_packed_desc)); 1554 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1555 1556 if (vq->use_map_api) { 1557 vq->packed.desc_extra[id].addr = addr; 1558 vq->packed.desc_extra[id].len = total_sg * 1559 sizeof(struct vring_packed_desc); 1560 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1561 vq->packed.avail_used_flags; 1562 } 1563 1564 /* 1565 * A driver MUST NOT make the first descriptor in the list 1566 * available before all subsequent descriptors comprising 1567 * the list are made available. 1568 */ 1569 virtio_wmb(vq->weak_barriers); 1570 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1571 vq->packed.avail_used_flags); 1572 1573 /* We're using some buffers from the free list. */ 1574 vq->vq.num_free -= 1; 1575 1576 /* Update free pointer */ 1577 n = head + 1; 1578 if (n >= vq->packed.vring.num) { 1579 n = 0; 1580 vq->packed.avail_wrap_counter ^= 1; 1581 vq->packed.avail_used_flags ^= 1582 1 << VRING_PACKED_DESC_F_AVAIL | 1583 1 << VRING_PACKED_DESC_F_USED; 1584 } 1585 vq->packed.next_avail_idx = n; 1586 if (!virtqueue_is_in_order(vq)) 1587 vq->free_head = vq->packed.desc_extra[id].next; 1588 1589 /* Store token and indirect buffer state. */ 1590 vq->packed.desc_state[id].num = 1; 1591 vq->packed.desc_state[id].data = data; 1592 vq->packed.desc_state[id].indir_desc = desc; 1593 vq->packed.desc_state[id].last = id; 1594 vq->packed.desc_state[id].total_in_len = total_in_len; 1595 1596 vq->num_added += 1; 1597 1598 pr_debug("Added buffer head %i to %p\n", head, vq); 1599 END_USE(vq); 1600 1601 return 0; 1602 1603 unmap_release: 1604 err_idx = i; 1605 1606 for (i = 0; i < err_idx; i++) 1607 vring_unmap_extra_packed(vq, &extra[i]); 1608 1609 kfree(desc); 1610 1611 END_USE(vq); 1612 return -ENOMEM; 1613 } 1614 1615 static inline int virtqueue_add_packed(struct vring_virtqueue *vq, 1616 struct scatterlist *sgs[], 1617 unsigned int total_sg, 1618 unsigned int out_sgs, 1619 unsigned int in_sgs, 1620 void *data, 1621 void *ctx, 1622 bool premapped, 1623 gfp_t gfp, 1624 unsigned long attr) 1625 { 1626 struct vring_packed_desc *desc; 1627 struct scatterlist *sg; 1628 unsigned int i, n, c, descs_used, err_idx, len; 1629 __le16 head_flags, flags; 1630 u16 head, id, prev, curr, avail_used_flags; 1631 int err; 1632 1633 START_USE(vq); 1634 1635 BUG_ON(data == NULL); 1636 BUG_ON(ctx && vq->indirect); 1637 1638 if (unlikely(vq->broken)) { 1639 END_USE(vq); 1640 return -EIO; 1641 } 1642 1643 LAST_ADD_TIME_UPDATE(vq); 1644 1645 BUG_ON(total_sg == 0); 1646 1647 if (virtqueue_use_indirect(vq, total_sg)) { 1648 id = vq->free_head; 1649 BUG_ON(id == vq->packed.vring.num); 1650 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1651 in_sgs, data, premapped, gfp, 1652 id, attr); 1653 if (err != -ENOMEM) { 1654 END_USE(vq); 1655 return err; 1656 } 1657 1658 /* fall back on direct */ 1659 } 1660 1661 head = vq->packed.next_avail_idx; 1662 avail_used_flags = vq->packed.avail_used_flags; 1663 1664 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1665 1666 desc = vq->packed.vring.desc; 1667 i = head; 1668 descs_used = total_sg; 1669 1670 if (unlikely(vq->vq.num_free < descs_used)) { 1671 pr_debug("Can't add buf len %i - avail = %i\n", 1672 descs_used, vq->vq.num_free); 1673 END_USE(vq); 1674 return -ENOSPC; 1675 } 1676 1677 id = vq->free_head; 1678 BUG_ON(id == vq->packed.vring.num); 1679 1680 curr = id; 1681 c = 0; 1682 for (n = 0; n < out_sgs + in_sgs; n++) { 1683 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1684 dma_addr_t addr; 1685 1686 if (vring_map_one_sg(vq, sg, n < out_sgs ? 1687 DMA_TO_DEVICE : DMA_FROM_DEVICE, 1688 &addr, &len, premapped, attr)) 1689 goto unmap_release; 1690 1691 flags = cpu_to_le16(vq->packed.avail_used_flags | 1692 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1693 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1694 if (i == head) 1695 head_flags = flags; 1696 else 1697 desc[i].flags = flags; 1698 1699 desc[i].addr = cpu_to_le64(addr); 1700 desc[i].len = cpu_to_le32(len); 1701 desc[i].id = cpu_to_le16(id); 1702 1703 if (unlikely(vq->use_map_api)) { 1704 vq->packed.desc_extra[curr].addr = premapped ? 1705 DMA_MAPPING_ERROR : addr; 1706 vq->packed.desc_extra[curr].len = len; 1707 vq->packed.desc_extra[curr].flags = 1708 le16_to_cpu(flags); 1709 } 1710 prev = curr; 1711 curr = vq->packed.desc_extra[curr].next; 1712 1713 if ((unlikely(++i >= vq->packed.vring.num))) { 1714 i = 0; 1715 vq->packed.avail_used_flags ^= 1716 1 << VRING_PACKED_DESC_F_AVAIL | 1717 1 << VRING_PACKED_DESC_F_USED; 1718 } 1719 } 1720 } 1721 1722 if (i <= head) 1723 vq->packed.avail_wrap_counter ^= 1; 1724 1725 /* We're using some buffers from the free list. */ 1726 vq->vq.num_free -= descs_used; 1727 1728 /* Update free pointer */ 1729 vq->packed.next_avail_idx = i; 1730 vq->free_head = curr; 1731 1732 /* Store token. */ 1733 vq->packed.desc_state[id].num = descs_used; 1734 vq->packed.desc_state[id].data = data; 1735 vq->packed.desc_state[id].indir_desc = ctx; 1736 vq->packed.desc_state[id].last = prev; 1737 1738 /* 1739 * A driver MUST NOT make the first descriptor in the list 1740 * available before all subsequent descriptors comprising 1741 * the list are made available. 1742 */ 1743 virtio_wmb(vq->weak_barriers); 1744 vq->packed.vring.desc[head].flags = head_flags; 1745 vq->num_added += descs_used; 1746 1747 pr_debug("Added buffer head %i to %p\n", head, vq); 1748 END_USE(vq); 1749 1750 return 0; 1751 1752 unmap_release: 1753 err_idx = i; 1754 i = head; 1755 curr = vq->free_head; 1756 1757 vq->packed.avail_used_flags = avail_used_flags; 1758 1759 for (n = 0; n < total_sg; n++) { 1760 if (i == err_idx) 1761 break; 1762 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); 1763 curr = vq->packed.desc_extra[curr].next; 1764 i++; 1765 if (i >= vq->packed.vring.num) 1766 i = 0; 1767 } 1768 1769 END_USE(vq); 1770 return -EIO; 1771 } 1772 1773 static inline int virtqueue_add_packed_in_order(struct vring_virtqueue *vq, 1774 struct scatterlist *sgs[], 1775 unsigned int total_sg, 1776 unsigned int out_sgs, 1777 unsigned int in_sgs, 1778 void *data, 1779 void *ctx, 1780 bool premapped, 1781 gfp_t gfp, 1782 unsigned long attr) 1783 { 1784 struct vring_packed_desc *desc; 1785 struct scatterlist *sg; 1786 unsigned int i, n, sg_count, err_idx, total_in_len = 0; 1787 __le16 head_flags, flags; 1788 u16 head, avail_used_flags; 1789 bool avail_wrap_counter; 1790 int err; 1791 1792 START_USE(vq); 1793 1794 BUG_ON(data == NULL); 1795 BUG_ON(ctx && vq->indirect); 1796 1797 if (unlikely(vq->broken)) { 1798 END_USE(vq); 1799 return -EIO; 1800 } 1801 1802 LAST_ADD_TIME_UPDATE(vq); 1803 1804 BUG_ON(total_sg == 0); 1805 1806 if (virtqueue_use_indirect(vq, total_sg)) { 1807 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1808 in_sgs, data, premapped, gfp, 1809 vq->packed.next_avail_idx, 1810 attr); 1811 if (err != -ENOMEM) { 1812 END_USE(vq); 1813 return err; 1814 } 1815 1816 /* fall back on direct */ 1817 } 1818 1819 head = vq->packed.next_avail_idx; 1820 avail_used_flags = vq->packed.avail_used_flags; 1821 avail_wrap_counter = vq->packed.avail_wrap_counter; 1822 1823 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1824 1825 desc = vq->packed.vring.desc; 1826 i = head; 1827 1828 if (unlikely(vq->vq.num_free < total_sg)) { 1829 pr_debug("Can't add buf len %i - avail = %i\n", 1830 total_sg, vq->vq.num_free); 1831 END_USE(vq); 1832 return -ENOSPC; 1833 } 1834 1835 sg_count = 0; 1836 for (n = 0; n < out_sgs + in_sgs; n++) { 1837 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1838 dma_addr_t addr; 1839 u32 len; 1840 1841 flags = 0; 1842 if (++sg_count != total_sg) 1843 flags |= cpu_to_le16(VRING_DESC_F_NEXT); 1844 if (n >= out_sgs) 1845 flags |= cpu_to_le16(VRING_DESC_F_WRITE); 1846 1847 if (vring_map_one_sg(vq, sg, n < out_sgs ? 1848 DMA_TO_DEVICE : DMA_FROM_DEVICE, 1849 &addr, &len, premapped, attr)) 1850 goto unmap_release; 1851 1852 flags |= cpu_to_le16(vq->packed.avail_used_flags); 1853 1854 if (i == head) 1855 head_flags = flags; 1856 else 1857 desc[i].flags = flags; 1858 1859 desc[i].addr = cpu_to_le64(addr); 1860 desc[i].len = cpu_to_le32(len); 1861 desc[i].id = cpu_to_le16(head); 1862 1863 if (unlikely(vq->use_map_api)) { 1864 vq->packed.desc_extra[i].addr = premapped ? 1865 DMA_MAPPING_ERROR : addr; 1866 vq->packed.desc_extra[i].len = len; 1867 vq->packed.desc_extra[i].flags = 1868 le16_to_cpu(flags); 1869 } 1870 1871 if ((unlikely(++i >= vq->packed.vring.num))) { 1872 i = 0; 1873 vq->packed.avail_used_flags ^= 1874 1 << VRING_PACKED_DESC_F_AVAIL | 1875 1 << VRING_PACKED_DESC_F_USED; 1876 vq->packed.avail_wrap_counter ^= 1; 1877 } 1878 1879 if (n >= out_sgs) 1880 total_in_len += len; 1881 } 1882 } 1883 1884 /* We're using some buffers from the free list. */ 1885 vq->vq.num_free -= total_sg; 1886 1887 /* Update free pointer */ 1888 vq->packed.next_avail_idx = i; 1889 1890 /* Store token. */ 1891 vq->packed.desc_state[head].num = total_sg; 1892 vq->packed.desc_state[head].data = data; 1893 vq->packed.desc_state[head].indir_desc = ctx; 1894 vq->packed.desc_state[head].total_in_len = total_in_len; 1895 1896 /* 1897 * A driver MUST NOT make the first descriptor in the list 1898 * available before all subsequent descriptors comprising 1899 * the list are made available. 1900 */ 1901 virtio_wmb(vq->weak_barriers); 1902 vq->packed.vring.desc[head].flags = head_flags; 1903 vq->num_added += total_sg; 1904 1905 pr_debug("Added buffer head %i to %p\n", head, vq); 1906 END_USE(vq); 1907 1908 return 0; 1909 1910 unmap_release: 1911 err_idx = i; 1912 i = head; 1913 vq->packed.avail_used_flags = avail_used_flags; 1914 vq->packed.avail_wrap_counter = avail_wrap_counter; 1915 1916 for (n = 0; n < total_sg; n++) { 1917 if (i == err_idx) 1918 break; 1919 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[i]); 1920 i++; 1921 if (i >= vq->packed.vring.num) 1922 i = 0; 1923 } 1924 1925 END_USE(vq); 1926 return -EIO; 1927 } 1928 1929 static bool virtqueue_kick_prepare_packed(struct vring_virtqueue *vq) 1930 { 1931 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1932 bool needs_kick; 1933 union { 1934 struct { 1935 __le16 off_wrap; 1936 __le16 flags; 1937 }; 1938 u32 u32; 1939 } snapshot; 1940 1941 START_USE(vq); 1942 1943 /* 1944 * We need to expose the new flags value before checking notification 1945 * suppressions. 1946 */ 1947 virtio_mb(vq->weak_barriers); 1948 1949 old = vq->packed.next_avail_idx - vq->num_added; 1950 new = vq->packed.next_avail_idx; 1951 vq->num_added = 0; 1952 1953 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1954 flags = le16_to_cpu(snapshot.flags); 1955 1956 LAST_ADD_TIME_CHECK(vq); 1957 LAST_ADD_TIME_INVALID(vq); 1958 1959 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1960 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1961 goto out; 1962 } 1963 1964 off_wrap = le16_to_cpu(snapshot.off_wrap); 1965 1966 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1967 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1968 if (wrap_counter != vq->packed.avail_wrap_counter) 1969 event_idx -= vq->packed.vring.num; 1970 1971 needs_kick = vring_need_event(event_idx, new, old); 1972 out: 1973 END_USE(vq); 1974 return needs_kick; 1975 } 1976 1977 static void detach_buf_packed_in_order(struct vring_virtqueue *vq, 1978 unsigned int id, void **ctx) 1979 { 1980 struct vring_desc_state_packed *state = NULL; 1981 struct vring_packed_desc *desc; 1982 unsigned int i, curr; 1983 1984 state = &vq->packed.desc_state[id]; 1985 1986 /* Clear data ptr. */ 1987 state->data = NULL; 1988 1989 vq->vq.num_free += state->num; 1990 1991 if (unlikely(vq->use_map_api)) { 1992 curr = id; 1993 for (i = 0; i < state->num; i++) { 1994 vring_unmap_extra_packed(vq, 1995 &vq->packed.desc_extra[curr]); 1996 curr = vq->packed.desc_extra[curr].next; 1997 } 1998 } 1999 2000 if (vq->indirect) { 2001 struct vring_desc_extra *extra; 2002 u32 len, num; 2003 2004 /* Free the indirect table, if any, now that it's unmapped. */ 2005 desc = state->indir_desc; 2006 if (!desc) 2007 return; 2008 2009 if (vq->use_map_api) { 2010 len = vq->packed.desc_extra[id].len; 2011 num = len / sizeof(struct vring_packed_desc); 2012 2013 extra = (struct vring_desc_extra *)&desc[num]; 2014 2015 for (i = 0; i < num; i++) 2016 vring_unmap_extra_packed(vq, &extra[i]); 2017 } 2018 kfree(desc); 2019 state->indir_desc = NULL; 2020 } else if (ctx) { 2021 *ctx = state->indir_desc; 2022 } 2023 } 2024 2025 static void detach_buf_packed(struct vring_virtqueue *vq, 2026 unsigned int id, void **ctx) 2027 { 2028 struct vring_desc_state_packed *state = &vq->packed.desc_state[id]; 2029 2030 vq->packed.desc_extra[state->last].next = vq->free_head; 2031 vq->free_head = id; 2032 2033 detach_buf_packed_in_order(vq, id, ctx); 2034 } 2035 2036 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 2037 u16 idx, bool used_wrap_counter) 2038 { 2039 bool avail, used; 2040 u16 flags; 2041 2042 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 2043 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 2044 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 2045 2046 return avail == used && used == used_wrap_counter; 2047 } 2048 2049 static bool virtqueue_poll_packed(const struct vring_virtqueue *vq, 2050 unsigned int off_wrap) 2051 { 2052 bool wrap_counter; 2053 u16 used_idx; 2054 2055 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 2056 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 2057 2058 return is_used_desc_packed(vq, used_idx, wrap_counter); 2059 } 2060 2061 static bool more_used_packed(const struct vring_virtqueue *vq) 2062 { 2063 return virtqueue_poll_packed(vq, READ_ONCE(vq->last_used_idx)); 2064 } 2065 2066 static void update_last_used_idx_packed(struct vring_virtqueue *vq, 2067 u16 id, u16 last_used, 2068 u16 used_wrap_counter) 2069 { 2070 last_used += vq->packed.desc_state[id].num; 2071 if (unlikely(last_used >= vq->packed.vring.num)) { 2072 last_used -= vq->packed.vring.num; 2073 used_wrap_counter ^= 1; 2074 } 2075 2076 last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 2077 WRITE_ONCE(vq->last_used_idx, last_used); 2078 2079 /* 2080 * If we expect an interrupt for the next entry, tell host 2081 * by writing event index and flush out the write before 2082 * the read in the next get_buf call. 2083 */ 2084 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 2085 virtio_store_mb(vq->weak_barriers, 2086 &vq->packed.vring.driver->off_wrap, 2087 cpu_to_le16(vq->last_used_idx)); 2088 } 2089 2090 static bool more_used_packed_in_order(const struct vring_virtqueue *vq) 2091 { 2092 if (vq->batch_last.id != UINT_MAX) 2093 return true; 2094 2095 return virtqueue_poll_packed(vq, READ_ONCE(vq->last_used_idx)); 2096 } 2097 2098 static void *virtqueue_get_buf_ctx_packed_in_order(struct vring_virtqueue *vq, 2099 unsigned int *len, 2100 void **ctx) 2101 { 2102 unsigned int num = vq->packed.vring.num; 2103 u16 last_used, last_used_idx; 2104 bool used_wrap_counter; 2105 void *ret; 2106 2107 START_USE(vq); 2108 2109 if (unlikely(vq->broken)) { 2110 END_USE(vq); 2111 return NULL; 2112 } 2113 2114 last_used_idx = vq->last_used_idx; 2115 used_wrap_counter = packed_used_wrap_counter(last_used_idx); 2116 last_used = packed_last_used(last_used_idx); 2117 2118 if (vq->batch_last.id == UINT_MAX) { 2119 if (!more_used_packed_in_order(vq)) { 2120 pr_debug("No more buffers in queue\n"); 2121 END_USE(vq); 2122 return NULL; 2123 } 2124 /* Only get used elements after they have been exposed by host. */ 2125 virtio_rmb(vq->weak_barriers); 2126 vq->batch_last.id = 2127 le16_to_cpu(vq->packed.vring.desc[last_used].id); 2128 vq->batch_last.len = 2129 le32_to_cpu(vq->packed.vring.desc[last_used].len); 2130 } 2131 2132 if (vq->batch_last.id == last_used) { 2133 vq->batch_last.id = UINT_MAX; 2134 *len = vq->batch_last.len; 2135 } else { 2136 *len = vq->packed.desc_state[last_used].total_in_len; 2137 } 2138 2139 if (unlikely(last_used >= num)) { 2140 BAD_RING(vq, "id %u out of range\n", last_used); 2141 return NULL; 2142 } 2143 if (unlikely(!vq->packed.desc_state[last_used].data)) { 2144 BAD_RING(vq, "id %u is not a head!\n", last_used); 2145 return NULL; 2146 } 2147 2148 /* detach_buf_packed clears data, so grab it now. */ 2149 ret = vq->packed.desc_state[last_used].data; 2150 detach_buf_packed_in_order(vq, last_used, ctx); 2151 2152 update_last_used_idx_packed(vq, last_used, last_used, 2153 used_wrap_counter); 2154 2155 LAST_ADD_TIME_INVALID(vq); 2156 2157 END_USE(vq); 2158 return ret; 2159 } 2160 2161 static void *virtqueue_get_buf_ctx_packed(struct vring_virtqueue *vq, 2162 unsigned int *len, 2163 void **ctx) 2164 { 2165 unsigned int num = vq->packed.vring.num; 2166 u16 last_used, id, last_used_idx; 2167 bool used_wrap_counter; 2168 void *ret; 2169 2170 START_USE(vq); 2171 2172 if (unlikely(vq->broken)) { 2173 END_USE(vq); 2174 return NULL; 2175 } 2176 2177 if (!more_used_packed(vq)) { 2178 pr_debug("No more buffers in queue\n"); 2179 END_USE(vq); 2180 return NULL; 2181 } 2182 2183 /* Only get used elements after they have been exposed by host. */ 2184 virtio_rmb(vq->weak_barriers); 2185 2186 last_used_idx = READ_ONCE(vq->last_used_idx); 2187 used_wrap_counter = packed_used_wrap_counter(last_used_idx); 2188 last_used = packed_last_used(last_used_idx); 2189 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 2190 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 2191 2192 if (unlikely(id >= num)) { 2193 BAD_RING(vq, "id %u out of range\n", id); 2194 return NULL; 2195 } 2196 if (unlikely(!vq->packed.desc_state[id].data)) { 2197 BAD_RING(vq, "id %u is not a head!\n", id); 2198 return NULL; 2199 } 2200 2201 /* detach_buf_packed clears data, so grab it now. */ 2202 ret = vq->packed.desc_state[id].data; 2203 detach_buf_packed(vq, id, ctx); 2204 2205 update_last_used_idx_packed(vq, id, last_used, used_wrap_counter); 2206 2207 LAST_ADD_TIME_INVALID(vq); 2208 2209 END_USE(vq); 2210 return ret; 2211 } 2212 2213 static void virtqueue_disable_cb_packed(struct vring_virtqueue *vq) 2214 { 2215 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 2216 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 2217 2218 /* 2219 * If device triggered an event already it won't trigger one again: 2220 * no need to disable. 2221 */ 2222 if (vq->event_triggered) 2223 return; 2224 2225 vq->packed.vring.driver->flags = 2226 cpu_to_le16(vq->packed.event_flags_shadow); 2227 } 2228 } 2229 2230 static unsigned int virtqueue_enable_cb_prepare_packed(struct vring_virtqueue *vq) 2231 { 2232 START_USE(vq); 2233 2234 /* 2235 * We optimistically turn back on interrupts, then check if there was 2236 * more to do. 2237 */ 2238 2239 if (vq->event) { 2240 vq->packed.vring.driver->off_wrap = 2241 cpu_to_le16(vq->last_used_idx); 2242 /* 2243 * We need to update event offset and event wrap 2244 * counter first before updating event flags. 2245 */ 2246 virtio_wmb(vq->weak_barriers); 2247 } 2248 2249 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 2250 vq->packed.event_flags_shadow = vq->event ? 2251 VRING_PACKED_EVENT_FLAG_DESC : 2252 VRING_PACKED_EVENT_FLAG_ENABLE; 2253 vq->packed.vring.driver->flags = 2254 cpu_to_le16(vq->packed.event_flags_shadow); 2255 } 2256 2257 END_USE(vq); 2258 return vq->last_used_idx; 2259 } 2260 2261 static bool virtqueue_enable_cb_delayed_packed(struct vring_virtqueue *vq) 2262 { 2263 u16 used_idx, wrap_counter, last_used_idx; 2264 u16 bufs; 2265 2266 START_USE(vq); 2267 2268 /* 2269 * We optimistically turn back on interrupts, then check if there was 2270 * more to do. 2271 */ 2272 2273 if (vq->event) { 2274 /* TODO: tune this threshold */ 2275 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 2276 last_used_idx = READ_ONCE(vq->last_used_idx); 2277 wrap_counter = packed_used_wrap_counter(last_used_idx); 2278 2279 used_idx = packed_last_used(last_used_idx) + bufs; 2280 if (used_idx >= vq->packed.vring.num) { 2281 used_idx -= vq->packed.vring.num; 2282 wrap_counter ^= 1; 2283 } 2284 2285 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 2286 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 2287 2288 /* 2289 * We need to update event offset and event wrap 2290 * counter first before updating event flags. 2291 */ 2292 virtio_wmb(vq->weak_barriers); 2293 } 2294 2295 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 2296 vq->packed.event_flags_shadow = vq->event ? 2297 VRING_PACKED_EVENT_FLAG_DESC : 2298 VRING_PACKED_EVENT_FLAG_ENABLE; 2299 vq->packed.vring.driver->flags = 2300 cpu_to_le16(vq->packed.event_flags_shadow); 2301 } 2302 2303 /* 2304 * We need to update event suppression structure first 2305 * before re-checking for more used buffers. 2306 */ 2307 virtio_mb(vq->weak_barriers); 2308 2309 last_used_idx = READ_ONCE(vq->last_used_idx); 2310 wrap_counter = packed_used_wrap_counter(last_used_idx); 2311 used_idx = packed_last_used(last_used_idx); 2312 if (is_used_desc_packed(vq, used_idx, wrap_counter)) { 2313 END_USE(vq); 2314 return false; 2315 } 2316 2317 END_USE(vq); 2318 return true; 2319 } 2320 2321 static void *virtqueue_detach_unused_buf_packed(struct vring_virtqueue *vq) 2322 { 2323 unsigned int i; 2324 void *buf; 2325 2326 START_USE(vq); 2327 2328 for (i = 0; i < vq->packed.vring.num; i++) { 2329 if (!vq->packed.desc_state[i].data) 2330 continue; 2331 /* detach_buf clears data, so grab it now. */ 2332 buf = vq->packed.desc_state[i].data; 2333 if (virtqueue_is_in_order(vq)) 2334 detach_buf_packed_in_order(vq, i, NULL); 2335 else 2336 detach_buf_packed(vq, i, NULL); 2337 END_USE(vq); 2338 return buf; 2339 } 2340 /* That should have freed everything. */ 2341 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 2342 2343 END_USE(vq); 2344 return NULL; 2345 } 2346 2347 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num) 2348 { 2349 struct vring_desc_extra *desc_extra; 2350 unsigned int i; 2351 2352 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), 2353 GFP_KERNEL); 2354 if (!desc_extra) 2355 return NULL; 2356 2357 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); 2358 2359 for (i = 0; i < num - 1; i++) 2360 desc_extra[i].next = i + 1; 2361 2362 desc_extra[num - 1].next = 0; 2363 2364 return desc_extra; 2365 } 2366 2367 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed, 2368 struct virtio_device *vdev, 2369 union virtio_map map) 2370 { 2371 if (vring_packed->vring.desc) 2372 vring_free_queue(vdev, vring_packed->ring_size_in_bytes, 2373 vring_packed->vring.desc, 2374 vring_packed->ring_dma_addr, 2375 map); 2376 2377 if (vring_packed->vring.driver) 2378 vring_free_queue(vdev, vring_packed->event_size_in_bytes, 2379 vring_packed->vring.driver, 2380 vring_packed->driver_event_dma_addr, 2381 map); 2382 2383 if (vring_packed->vring.device) 2384 vring_free_queue(vdev, vring_packed->event_size_in_bytes, 2385 vring_packed->vring.device, 2386 vring_packed->device_event_dma_addr, 2387 map); 2388 2389 kfree(vring_packed->desc_state); 2390 kfree(vring_packed->desc_extra); 2391 } 2392 2393 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, 2394 struct virtio_device *vdev, 2395 u32 num, union virtio_map map) 2396 { 2397 struct vring_packed_desc *ring; 2398 struct vring_packed_desc_event *driver, *device; 2399 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 2400 size_t ring_size_in_bytes, event_size_in_bytes; 2401 2402 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 2403 2404 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 2405 &ring_dma_addr, 2406 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 2407 map); 2408 if (!ring) 2409 goto err; 2410 2411 vring_packed->vring.desc = ring; 2412 vring_packed->ring_dma_addr = ring_dma_addr; 2413 vring_packed->ring_size_in_bytes = ring_size_in_bytes; 2414 2415 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 2416 2417 driver = vring_alloc_queue(vdev, event_size_in_bytes, 2418 &driver_event_dma_addr, 2419 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 2420 map); 2421 if (!driver) 2422 goto err; 2423 2424 vring_packed->vring.driver = driver; 2425 vring_packed->event_size_in_bytes = event_size_in_bytes; 2426 vring_packed->driver_event_dma_addr = driver_event_dma_addr; 2427 2428 device = vring_alloc_queue(vdev, event_size_in_bytes, 2429 &device_event_dma_addr, 2430 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 2431 map); 2432 if (!device) 2433 goto err; 2434 2435 vring_packed->vring.device = device; 2436 vring_packed->device_event_dma_addr = device_event_dma_addr; 2437 2438 vring_packed->vring.num = num; 2439 2440 return 0; 2441 2442 err: 2443 vring_free_packed(vring_packed, vdev, map); 2444 return -ENOMEM; 2445 } 2446 2447 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed) 2448 { 2449 struct vring_desc_state_packed *state; 2450 struct vring_desc_extra *extra; 2451 u32 num = vring_packed->vring.num; 2452 2453 state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL); 2454 if (!state) 2455 goto err_desc_state; 2456 2457 memset(state, 0, num * sizeof(struct vring_desc_state_packed)); 2458 2459 extra = vring_alloc_desc_extra(num); 2460 if (!extra) 2461 goto err_desc_extra; 2462 2463 vring_packed->desc_state = state; 2464 vring_packed->desc_extra = extra; 2465 2466 return 0; 2467 2468 err_desc_extra: 2469 kfree(state); 2470 err_desc_state: 2471 return -ENOMEM; 2472 } 2473 2474 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed, 2475 bool callback) 2476 { 2477 vring_packed->next_avail_idx = 0; 2478 vring_packed->avail_wrap_counter = 1; 2479 vring_packed->event_flags_shadow = 0; 2480 vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 2481 2482 /* No callback? Tell other side not to bother us. */ 2483 if (!callback) { 2484 vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 2485 vring_packed->vring.driver->flags = 2486 cpu_to_le16(vring_packed->event_flags_shadow); 2487 } 2488 } 2489 2490 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq, 2491 struct vring_virtqueue_packed *vring_packed) 2492 { 2493 vq->packed = *vring_packed; 2494 2495 if (virtqueue_is_in_order(vq)) { 2496 vq->batch_last.id = UINT_MAX; 2497 } else { 2498 /* 2499 * Put everything in free lists. Note that 2500 * next_avail_idx is sufficient with IN_ORDER so 2501 * free_head is unused. 2502 */ 2503 vq->free_head = 0; 2504 } 2505 } 2506 static void virtqueue_reset_packed(struct vring_virtqueue *vq) 2507 { 2508 memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes); 2509 memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes); 2510 2511 /* we need to reset the desc.flags. For more, see is_used_desc_packed() */ 2512 memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes); 2513 virtqueue_init(vq, vq->packed.vring.num); 2514 virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback); 2515 } 2516 2517 static const struct virtqueue_ops packed_ops; 2518 2519 static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index, 2520 struct vring_virtqueue_packed *vring_packed, 2521 struct virtio_device *vdev, 2522 bool weak_barriers, 2523 bool context, 2524 bool (*notify)(struct virtqueue *), 2525 void (*callback)(struct virtqueue *), 2526 const char *name, 2527 union virtio_map map) 2528 { 2529 struct vring_virtqueue *vq; 2530 int err; 2531 2532 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2533 if (!vq) 2534 return NULL; 2535 2536 vq->vq.callback = callback; 2537 vq->vq.vdev = vdev; 2538 vq->vq.name = name; 2539 vq->vq.index = index; 2540 vq->vq.reset = false; 2541 vq->we_own_ring = false; 2542 vq->notify = notify; 2543 vq->weak_barriers = weak_barriers; 2544 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 2545 vq->broken = true; 2546 #else 2547 vq->broken = false; 2548 #endif 2549 vq->map = map; 2550 vq->use_map_api = vring_use_map_api(vdev); 2551 2552 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2553 !context; 2554 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2555 vq->layout = virtio_has_feature(vdev, VIRTIO_F_IN_ORDER) ? 2556 VQ_LAYOUT_PACKED_IN_ORDER : VQ_LAYOUT_PACKED; 2557 2558 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2559 vq->weak_barriers = false; 2560 2561 err = vring_alloc_state_extra_packed(vring_packed); 2562 if (err) { 2563 kfree(vq); 2564 return NULL; 2565 } 2566 2567 virtqueue_vring_init_packed(vring_packed, !!callback); 2568 2569 virtqueue_init(vq, vring_packed->vring.num); 2570 virtqueue_vring_attach_packed(vq, vring_packed); 2571 2572 spin_lock(&vdev->vqs_list_lock); 2573 list_add_tail(&vq->vq.list, &vdev->vqs); 2574 spin_unlock(&vdev->vqs_list_lock); 2575 return &vq->vq; 2576 } 2577 2578 static struct virtqueue *vring_create_virtqueue_packed( 2579 unsigned int index, 2580 unsigned int num, 2581 unsigned int vring_align, 2582 struct virtio_device *vdev, 2583 bool weak_barriers, 2584 bool may_reduce_num, 2585 bool context, 2586 bool (*notify)(struct virtqueue *), 2587 void (*callback)(struct virtqueue *), 2588 const char *name, 2589 union virtio_map map) 2590 { 2591 struct vring_virtqueue_packed vring_packed = {}; 2592 struct virtqueue *vq; 2593 2594 if (vring_alloc_queue_packed(&vring_packed, vdev, num, map)) 2595 return NULL; 2596 2597 vq = __vring_new_virtqueue_packed(index, &vring_packed, vdev, weak_barriers, 2598 context, notify, callback, name, map); 2599 if (!vq) { 2600 vring_free_packed(&vring_packed, vdev, map); 2601 return NULL; 2602 } 2603 2604 to_vvq(vq)->we_own_ring = true; 2605 2606 return vq; 2607 } 2608 2609 static int virtqueue_resize_packed(struct vring_virtqueue *vq, u32 num) 2610 { 2611 struct vring_virtqueue_packed vring_packed = {}; 2612 struct virtio_device *vdev = vq->vq.vdev; 2613 int err; 2614 2615 if (vring_alloc_queue_packed(&vring_packed, vdev, num, vq->map)) 2616 goto err_ring; 2617 2618 err = vring_alloc_state_extra_packed(&vring_packed); 2619 if (err) 2620 goto err_state_extra; 2621 2622 vring_free(&vq->vq); 2623 2624 virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback); 2625 2626 virtqueue_init(vq, vring_packed.vring.num); 2627 virtqueue_vring_attach_packed(vq, &vring_packed); 2628 2629 return 0; 2630 2631 err_state_extra: 2632 vring_free_packed(&vring_packed, vdev, vq->map); 2633 err_ring: 2634 virtqueue_reset_packed(vq); 2635 return -ENOMEM; 2636 } 2637 2638 static const struct virtqueue_ops split_ops = { 2639 .add = virtqueue_add_split, 2640 .get = virtqueue_get_buf_ctx_split, 2641 .kick_prepare = virtqueue_kick_prepare_split, 2642 .disable_cb = virtqueue_disable_cb_split, 2643 .enable_cb_delayed = virtqueue_enable_cb_delayed_split, 2644 .enable_cb_prepare = virtqueue_enable_cb_prepare_split, 2645 .poll = virtqueue_poll_split, 2646 .detach_unused_buf = virtqueue_detach_unused_buf_split, 2647 .more_used = more_used_split, 2648 .resize = virtqueue_resize_split, 2649 .reset = virtqueue_reset_split, 2650 }; 2651 2652 static const struct virtqueue_ops packed_ops = { 2653 .add = virtqueue_add_packed, 2654 .get = virtqueue_get_buf_ctx_packed, 2655 .kick_prepare = virtqueue_kick_prepare_packed, 2656 .disable_cb = virtqueue_disable_cb_packed, 2657 .enable_cb_delayed = virtqueue_enable_cb_delayed_packed, 2658 .enable_cb_prepare = virtqueue_enable_cb_prepare_packed, 2659 .poll = virtqueue_poll_packed, 2660 .detach_unused_buf = virtqueue_detach_unused_buf_packed, 2661 .more_used = more_used_packed, 2662 .resize = virtqueue_resize_packed, 2663 .reset = virtqueue_reset_packed, 2664 }; 2665 2666 static const struct virtqueue_ops split_in_order_ops = { 2667 .add = virtqueue_add_split, 2668 .get = virtqueue_get_buf_ctx_split_in_order, 2669 .kick_prepare = virtqueue_kick_prepare_split, 2670 .disable_cb = virtqueue_disable_cb_split, 2671 .enable_cb_delayed = virtqueue_enable_cb_delayed_split, 2672 .enable_cb_prepare = virtqueue_enable_cb_prepare_split, 2673 .poll = virtqueue_poll_split, 2674 .detach_unused_buf = virtqueue_detach_unused_buf_split, 2675 .more_used = more_used_split_in_order, 2676 .resize = virtqueue_resize_split, 2677 .reset = virtqueue_reset_split, 2678 }; 2679 2680 static const struct virtqueue_ops packed_in_order_ops = { 2681 .add = virtqueue_add_packed_in_order, 2682 .get = virtqueue_get_buf_ctx_packed_in_order, 2683 .kick_prepare = virtqueue_kick_prepare_packed, 2684 .disable_cb = virtqueue_disable_cb_packed, 2685 .enable_cb_delayed = virtqueue_enable_cb_delayed_packed, 2686 .enable_cb_prepare = virtqueue_enable_cb_prepare_packed, 2687 .poll = virtqueue_poll_packed, 2688 .detach_unused_buf = virtqueue_detach_unused_buf_packed, 2689 .more_used = more_used_packed_in_order, 2690 .resize = virtqueue_resize_packed, 2691 .reset = virtqueue_reset_packed, 2692 }; 2693 2694 static int virtqueue_disable_and_recycle(struct virtqueue *_vq, 2695 void (*recycle)(struct virtqueue *vq, void *buf)) 2696 { 2697 struct vring_virtqueue *vq = to_vvq(_vq); 2698 struct virtio_device *vdev = vq->vq.vdev; 2699 void *buf; 2700 int err; 2701 2702 if (!vq->we_own_ring) 2703 return -EPERM; 2704 2705 if (!vdev->config->disable_vq_and_reset) 2706 return -ENOENT; 2707 2708 if (!vdev->config->enable_vq_after_reset) 2709 return -ENOENT; 2710 2711 err = vdev->config->disable_vq_and_reset(_vq); 2712 if (err) 2713 return err; 2714 2715 while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) 2716 recycle(_vq, buf); 2717 2718 return 0; 2719 } 2720 2721 static int virtqueue_enable_after_reset(struct virtqueue *_vq) 2722 { 2723 struct vring_virtqueue *vq = to_vvq(_vq); 2724 struct virtio_device *vdev = vq->vq.vdev; 2725 2726 if (vdev->config->enable_vq_after_reset(_vq)) 2727 return -EBUSY; 2728 2729 return 0; 2730 } 2731 2732 /* 2733 * Generic functions and exported symbols. 2734 */ 2735 2736 #define VIRTQUEUE_CALL(vq, op, ...) \ 2737 ({ \ 2738 typeof(vq) __VIRTQUEUE_CALL_vq = (vq); \ 2739 typeof(split_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__)) ret; \ 2740 \ 2741 switch (__VIRTQUEUE_CALL_vq->layout) { \ 2742 case VQ_LAYOUT_SPLIT: \ 2743 ret = split_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__); \ 2744 break; \ 2745 case VQ_LAYOUT_PACKED: \ 2746 ret = packed_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__);\ 2747 break; \ 2748 case VQ_LAYOUT_SPLIT_IN_ORDER: \ 2749 ret = split_in_order_ops.op(vq, ##__VA_ARGS__); \ 2750 break; \ 2751 case VQ_LAYOUT_PACKED_IN_ORDER: \ 2752 ret = packed_in_order_ops.op(vq, ##__VA_ARGS__); \ 2753 break; \ 2754 default: \ 2755 BUG(); \ 2756 break; \ 2757 } \ 2758 ret; \ 2759 }) 2760 2761 #define VOID_VIRTQUEUE_CALL(vq, op, ...) \ 2762 ({ \ 2763 typeof(vq) __VIRTQUEUE_CALL_vq = (vq); \ 2764 \ 2765 switch (__VIRTQUEUE_CALL_vq->layout) { \ 2766 case VQ_LAYOUT_SPLIT: \ 2767 split_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__); \ 2768 break; \ 2769 case VQ_LAYOUT_PACKED: \ 2770 packed_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__); \ 2771 break; \ 2772 case VQ_LAYOUT_SPLIT_IN_ORDER: \ 2773 split_in_order_ops.op(vq, ##__VA_ARGS__); \ 2774 break; \ 2775 case VQ_LAYOUT_PACKED_IN_ORDER: \ 2776 packed_in_order_ops.op(vq, ##__VA_ARGS__); \ 2777 break; \ 2778 default: \ 2779 BUG(); \ 2780 break; \ 2781 } \ 2782 }) 2783 2784 static inline int virtqueue_add(struct virtqueue *_vq, 2785 struct scatterlist *sgs[], 2786 unsigned int total_sg, 2787 unsigned int out_sgs, 2788 unsigned int in_sgs, 2789 void *data, 2790 void *ctx, 2791 bool premapped, 2792 gfp_t gfp, 2793 unsigned long attr) 2794 { 2795 struct vring_virtqueue *vq = to_vvq(_vq); 2796 2797 return VIRTQUEUE_CALL(vq, add, sgs, total_sg, 2798 out_sgs, in_sgs, data, 2799 ctx, premapped, gfp, attr); 2800 } 2801 2802 /** 2803 * virtqueue_add_sgs - expose buffers to other end 2804 * @_vq: the struct virtqueue we're talking about. 2805 * @sgs: array of terminated scatterlists. 2806 * @out_sgs: the number of scatterlists readable by other side 2807 * @in_sgs: the number of scatterlists which are writable (after readable ones) 2808 * @data: the token identifying the buffer. 2809 * @gfp: how to do memory allocations (if necessary). 2810 * 2811 * Caller must ensure we don't call this with other virtqueue operations 2812 * at the same time (except where noted). 2813 * 2814 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2815 * 2816 * NB: ENOSPC is a special code that is only returned on an attempt to add a 2817 * buffer to a full VQ. It indicates that some buffers are outstanding and that 2818 * the operation can be retried after some buffers have been used. 2819 */ 2820 int virtqueue_add_sgs(struct virtqueue *_vq, 2821 struct scatterlist *sgs[], 2822 unsigned int out_sgs, 2823 unsigned int in_sgs, 2824 void *data, 2825 gfp_t gfp) 2826 { 2827 unsigned int i, total_sg = 0; 2828 2829 /* Count them first. */ 2830 for (i = 0; i < out_sgs + in_sgs; i++) { 2831 struct scatterlist *sg; 2832 2833 for (sg = sgs[i]; sg; sg = sg_next(sg)) 2834 total_sg++; 2835 } 2836 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 2837 data, NULL, false, gfp, 0); 2838 } 2839 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 2840 2841 /** 2842 * virtqueue_add_outbuf - expose output buffers to other end 2843 * @vq: the struct virtqueue we're talking about. 2844 * @sg: scatterlist (must be well-formed and terminated!) 2845 * @num: the number of entries in @sg readable by other side 2846 * @data: the token identifying the buffer. 2847 * @gfp: how to do memory allocations (if necessary). 2848 * 2849 * Caller must ensure we don't call this with other virtqueue operations 2850 * at the same time (except where noted). 2851 * 2852 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2853 */ 2854 int virtqueue_add_outbuf(struct virtqueue *vq, 2855 struct scatterlist *sg, unsigned int num, 2856 void *data, 2857 gfp_t gfp) 2858 { 2859 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, false, gfp, 0); 2860 } 2861 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 2862 2863 /** 2864 * virtqueue_add_outbuf_premapped - expose output buffers to other end 2865 * @vq: the struct virtqueue we're talking about. 2866 * @sg: scatterlist (must be well-formed and terminated!) 2867 * @num: the number of entries in @sg readable by other side 2868 * @data: the token identifying the buffer. 2869 * @gfp: how to do memory allocations (if necessary). 2870 * 2871 * Caller must ensure we don't call this with other virtqueue operations 2872 * at the same time (except where noted). 2873 * 2874 * Return: 2875 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2876 */ 2877 int virtqueue_add_outbuf_premapped(struct virtqueue *vq, 2878 struct scatterlist *sg, unsigned int num, 2879 void *data, 2880 gfp_t gfp) 2881 { 2882 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, true, gfp, 0); 2883 } 2884 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped); 2885 2886 /** 2887 * virtqueue_add_inbuf - expose input buffers to other end 2888 * @vq: the struct virtqueue we're talking about. 2889 * @sg: scatterlist (must be well-formed and terminated!) 2890 * @num: the number of entries in @sg writable by other side 2891 * @data: the token identifying the buffer. 2892 * @gfp: how to do memory allocations (if necessary). 2893 * 2894 * Caller must ensure we don't call this with other virtqueue operations 2895 * at the same time (except where noted). 2896 * 2897 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2898 */ 2899 int virtqueue_add_inbuf(struct virtqueue *vq, 2900 struct scatterlist *sg, unsigned int num, 2901 void *data, 2902 gfp_t gfp) 2903 { 2904 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp, 0); 2905 } 2906 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 2907 2908 /** 2909 * virtqueue_add_inbuf_cache_clean - expose input buffers with cache clean 2910 * @vq: the struct virtqueue we're talking about. 2911 * @sg: scatterlist (must be well-formed and terminated!) 2912 * @num: the number of entries in @sg writable by other side 2913 * @data: the token identifying the buffer. 2914 * @gfp: how to do memory allocations (if necessary). 2915 * 2916 * Same as virtqueue_add_inbuf but passes DMA_ATTR_CPU_CACHE_CLEAN to indicate 2917 * that the CPU will not dirty any cacheline overlapping this buffer while it 2918 * is available, and to suppress overlapping cacheline warnings in DMA debug 2919 * builds. 2920 * 2921 * Caller must ensure we don't call this with other virtqueue operations 2922 * at the same time (except where noted). 2923 * 2924 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2925 */ 2926 int virtqueue_add_inbuf_cache_clean(struct virtqueue *vq, 2927 struct scatterlist *sg, unsigned int num, 2928 void *data, 2929 gfp_t gfp) 2930 { 2931 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp, 2932 DMA_ATTR_CPU_CACHE_CLEAN); 2933 } 2934 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_cache_clean); 2935 2936 /** 2937 * virtqueue_add_inbuf_ctx - expose input buffers to other end 2938 * @vq: the struct virtqueue we're talking about. 2939 * @sg: scatterlist (must be well-formed and terminated!) 2940 * @num: the number of entries in @sg writable by other side 2941 * @data: the token identifying the buffer. 2942 * @ctx: extra context for the token 2943 * @gfp: how to do memory allocations (if necessary). 2944 * 2945 * Caller must ensure we don't call this with other virtqueue operations 2946 * at the same time (except where noted). 2947 * 2948 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2949 */ 2950 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 2951 struct scatterlist *sg, unsigned int num, 2952 void *data, 2953 void *ctx, 2954 gfp_t gfp) 2955 { 2956 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, false, gfp, 0); 2957 } 2958 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 2959 2960 /** 2961 * virtqueue_add_inbuf_premapped - expose input buffers to other end 2962 * @vq: the struct virtqueue we're talking about. 2963 * @sg: scatterlist (must be well-formed and terminated!) 2964 * @num: the number of entries in @sg writable by other side 2965 * @data: the token identifying the buffer. 2966 * @ctx: extra context for the token 2967 * @gfp: how to do memory allocations (if necessary). 2968 * 2969 * Caller must ensure we don't call this with other virtqueue operations 2970 * at the same time (except where noted). 2971 * 2972 * Return: 2973 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2974 */ 2975 int virtqueue_add_inbuf_premapped(struct virtqueue *vq, 2976 struct scatterlist *sg, unsigned int num, 2977 void *data, 2978 void *ctx, 2979 gfp_t gfp) 2980 { 2981 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, true, gfp, 0); 2982 } 2983 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_premapped); 2984 2985 /** 2986 * virtqueue_dma_dev - get the dma dev 2987 * @_vq: the struct virtqueue we're talking about. 2988 * 2989 * Returns the dma dev. That can been used for dma api. 2990 */ 2991 struct device *virtqueue_dma_dev(struct virtqueue *_vq) 2992 { 2993 struct vring_virtqueue *vq = to_vvq(_vq); 2994 2995 if (vq->use_map_api && !_vq->vdev->map) 2996 return vq->map.dma_dev; 2997 else 2998 return NULL; 2999 } 3000 EXPORT_SYMBOL_GPL(virtqueue_dma_dev); 3001 3002 /** 3003 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 3004 * @_vq: the struct virtqueue 3005 * 3006 * Instead of virtqueue_kick(), you can do: 3007 * if (virtqueue_kick_prepare(vq)) 3008 * virtqueue_notify(vq); 3009 * 3010 * This is sometimes useful because the virtqueue_kick_prepare() needs 3011 * to be serialized, but the actual virtqueue_notify() call does not. 3012 */ 3013 bool virtqueue_kick_prepare(struct virtqueue *_vq) 3014 { 3015 struct vring_virtqueue *vq = to_vvq(_vq); 3016 3017 return VIRTQUEUE_CALL(vq, kick_prepare); 3018 } 3019 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 3020 3021 /** 3022 * virtqueue_notify - second half of split virtqueue_kick call. 3023 * @_vq: the struct virtqueue 3024 * 3025 * This does not need to be serialized. 3026 * 3027 * Returns false if host notify failed or queue is broken, otherwise true. 3028 */ 3029 bool virtqueue_notify(struct virtqueue *_vq) 3030 { 3031 struct vring_virtqueue *vq = to_vvq(_vq); 3032 3033 if (unlikely(vq->broken)) 3034 return false; 3035 3036 /* Prod other side to tell it about changes. */ 3037 if (!vq->notify(_vq)) { 3038 vq->broken = true; 3039 return false; 3040 } 3041 return true; 3042 } 3043 EXPORT_SYMBOL_GPL(virtqueue_notify); 3044 3045 /** 3046 * virtqueue_kick - update after add_buf 3047 * @vq: the struct virtqueue 3048 * 3049 * After one or more virtqueue_add_* calls, invoke this to kick 3050 * the other side. 3051 * 3052 * Caller must ensure we don't call this with other virtqueue 3053 * operations at the same time (except where noted). 3054 * 3055 * Returns false if kick failed, otherwise true. 3056 */ 3057 bool virtqueue_kick(struct virtqueue *vq) 3058 { 3059 if (virtqueue_kick_prepare(vq)) 3060 return virtqueue_notify(vq); 3061 return true; 3062 } 3063 EXPORT_SYMBOL_GPL(virtqueue_kick); 3064 3065 /** 3066 * virtqueue_get_buf_ctx - get the next used buffer 3067 * @_vq: the struct virtqueue we're talking about. 3068 * @len: the length written into the buffer 3069 * @ctx: extra context for the token 3070 * 3071 * If the device wrote data into the buffer, @len will be set to the 3072 * amount written. This means you don't need to clear the buffer 3073 * beforehand to ensure there's no data leakage in the case of short 3074 * writes. 3075 * 3076 * Caller must ensure we don't call this with other virtqueue 3077 * operations at the same time (except where noted). 3078 * 3079 * Returns NULL if there are no used buffers, or the "data" token 3080 * handed to virtqueue_add_*(). 3081 */ 3082 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 3083 void **ctx) 3084 { 3085 struct vring_virtqueue *vq = to_vvq(_vq); 3086 3087 return VIRTQUEUE_CALL(vq, get, len, ctx); 3088 } 3089 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 3090 3091 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 3092 { 3093 return virtqueue_get_buf_ctx(_vq, len, NULL); 3094 } 3095 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 3096 /** 3097 * virtqueue_disable_cb - disable callbacks 3098 * @_vq: the struct virtqueue we're talking about. 3099 * 3100 * Note that this is not necessarily synchronous, hence unreliable and only 3101 * useful as an optimization. 3102 * 3103 * Unlike other operations, this need not be serialized. 3104 */ 3105 void virtqueue_disable_cb(struct virtqueue *_vq) 3106 { 3107 struct vring_virtqueue *vq = to_vvq(_vq); 3108 3109 VOID_VIRTQUEUE_CALL(vq, disable_cb); 3110 } 3111 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 3112 3113 /** 3114 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 3115 * @_vq: the struct virtqueue we're talking about. 3116 * 3117 * This re-enables callbacks; it returns current queue state 3118 * in an opaque unsigned value. This value should be later tested by 3119 * virtqueue_poll, to detect a possible race between the driver checking for 3120 * more work, and enabling callbacks. 3121 * 3122 * Caller must ensure we don't call this with other virtqueue 3123 * operations at the same time (except where noted). 3124 */ 3125 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq) 3126 { 3127 struct vring_virtqueue *vq = to_vvq(_vq); 3128 3129 if (vq->event_triggered) 3130 vq->event_triggered = false; 3131 3132 return VIRTQUEUE_CALL(vq, enable_cb_prepare); 3133 } 3134 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 3135 3136 /** 3137 * virtqueue_poll - query pending used buffers 3138 * @_vq: the struct virtqueue we're talking about. 3139 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 3140 * 3141 * Returns "true" if there are pending used buffers in the queue. 3142 * 3143 * This does not need to be serialized. 3144 */ 3145 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx) 3146 { 3147 struct vring_virtqueue *vq = to_vvq(_vq); 3148 3149 if (unlikely(vq->broken)) 3150 return false; 3151 3152 virtio_mb(vq->weak_barriers); 3153 3154 return VIRTQUEUE_CALL(vq, poll, last_used_idx); 3155 } 3156 EXPORT_SYMBOL_GPL(virtqueue_poll); 3157 3158 /** 3159 * virtqueue_enable_cb - restart callbacks after disable_cb. 3160 * @_vq: the struct virtqueue we're talking about. 3161 * 3162 * This re-enables callbacks; it returns "false" if there are pending 3163 * buffers in the queue, to detect a possible race between the driver 3164 * checking for more work, and enabling callbacks. 3165 * 3166 * Caller must ensure we don't call this with other virtqueue 3167 * operations at the same time (except where noted). 3168 */ 3169 bool virtqueue_enable_cb(struct virtqueue *_vq) 3170 { 3171 unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq); 3172 3173 return !virtqueue_poll(_vq, last_used_idx); 3174 } 3175 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 3176 3177 /** 3178 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 3179 * @_vq: the struct virtqueue we're talking about. 3180 * 3181 * This re-enables callbacks but hints to the other side to delay 3182 * interrupts until most of the available buffers have been processed; 3183 * it returns "false" if there are many pending buffers in the queue, 3184 * to detect a possible race between the driver checking for more work, 3185 * and enabling callbacks. 3186 * 3187 * Caller must ensure we don't call this with other virtqueue 3188 * operations at the same time (except where noted). 3189 */ 3190 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 3191 { 3192 struct vring_virtqueue *vq = to_vvq(_vq); 3193 3194 if (vq->event_triggered) 3195 data_race(vq->event_triggered = false); 3196 3197 return VIRTQUEUE_CALL(vq, enable_cb_delayed); 3198 } 3199 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 3200 3201 /** 3202 * virtqueue_detach_unused_buf - detach first unused buffer 3203 * @_vq: the struct virtqueue we're talking about. 3204 * 3205 * Returns NULL or the "data" token handed to virtqueue_add_*(). 3206 * This is not valid on an active queue; it is useful for device 3207 * shutdown or the reset queue. 3208 */ 3209 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 3210 { 3211 struct vring_virtqueue *vq = to_vvq(_vq); 3212 3213 return VIRTQUEUE_CALL(vq, detach_unused_buf); 3214 } 3215 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 3216 3217 static inline bool more_used(const struct vring_virtqueue *vq) 3218 { 3219 return VIRTQUEUE_CALL(vq, more_used); 3220 } 3221 3222 /** 3223 * vring_interrupt - notify a virtqueue on an interrupt 3224 * @irq: the IRQ number (ignored) 3225 * @_vq: the struct virtqueue to notify 3226 * 3227 * Calls the callback function of @_vq to process the virtqueue 3228 * notification. 3229 */ 3230 irqreturn_t vring_interrupt(int irq, void *_vq) 3231 { 3232 struct vring_virtqueue *vq = to_vvq(_vq); 3233 3234 if (!more_used(vq)) { 3235 pr_debug("virtqueue interrupt with no work for %p\n", vq); 3236 return IRQ_NONE; 3237 } 3238 3239 if (unlikely(vq->broken)) { 3240 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 3241 dev_warn_once(&vq->vq.vdev->dev, 3242 "virtio vring IRQ raised before DRIVER_OK"); 3243 return IRQ_NONE; 3244 #else 3245 return IRQ_HANDLED; 3246 #endif 3247 } 3248 3249 /* Just a hint for performance: so it's ok that this can be racy! */ 3250 if (vq->event) 3251 data_race(vq->event_triggered = true); 3252 3253 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 3254 if (vq->vq.callback) 3255 vq->vq.callback(&vq->vq); 3256 3257 return IRQ_HANDLED; 3258 } 3259 EXPORT_SYMBOL_GPL(vring_interrupt); 3260 3261 struct virtqueue *vring_create_virtqueue( 3262 unsigned int index, 3263 unsigned int num, 3264 unsigned int vring_align, 3265 struct virtio_device *vdev, 3266 bool weak_barriers, 3267 bool may_reduce_num, 3268 bool context, 3269 bool (*notify)(struct virtqueue *), 3270 void (*callback)(struct virtqueue *), 3271 const char *name) 3272 { 3273 union virtio_map map = {.dma_dev = vdev->dev.parent}; 3274 3275 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 3276 return vring_create_virtqueue_packed(index, num, vring_align, 3277 vdev, weak_barriers, may_reduce_num, 3278 context, notify, callback, name, map); 3279 3280 return vring_create_virtqueue_split(index, num, vring_align, 3281 vdev, weak_barriers, may_reduce_num, 3282 context, notify, callback, name, map); 3283 } 3284 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 3285 3286 struct virtqueue *vring_create_virtqueue_map( 3287 unsigned int index, 3288 unsigned int num, 3289 unsigned int vring_align, 3290 struct virtio_device *vdev, 3291 bool weak_barriers, 3292 bool may_reduce_num, 3293 bool context, 3294 bool (*notify)(struct virtqueue *), 3295 void (*callback)(struct virtqueue *), 3296 const char *name, 3297 union virtio_map map) 3298 { 3299 3300 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 3301 return vring_create_virtqueue_packed(index, num, vring_align, 3302 vdev, weak_barriers, may_reduce_num, 3303 context, notify, callback, name, map); 3304 3305 return vring_create_virtqueue_split(index, num, vring_align, 3306 vdev, weak_barriers, may_reduce_num, 3307 context, notify, callback, name, map); 3308 } 3309 EXPORT_SYMBOL_GPL(vring_create_virtqueue_map); 3310 3311 /** 3312 * virtqueue_resize - resize the vring of vq 3313 * @_vq: the struct virtqueue we're talking about. 3314 * @num: new ring num 3315 * @recycle: callback to recycle unused buffers 3316 * @recycle_done: callback to be invoked when recycle for all unused buffers done 3317 * 3318 * When it is really necessary to create a new vring, it will set the current vq 3319 * into the reset state. Then call the passed callback to recycle the buffer 3320 * that is no longer used. Only after the new vring is successfully created, the 3321 * old vring will be released. 3322 * 3323 * Caller must ensure we don't call this with other virtqueue operations 3324 * at the same time (except where noted). 3325 * 3326 * Returns zero or a negative error. 3327 * 0: success. 3328 * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size. 3329 * vq can still work normally 3330 * -EBUSY: Failed to sync with device, vq may not work properly 3331 * -ENOENT: Transport or device not supported 3332 * -E2BIG/-EINVAL: num error 3333 * -EPERM: Operation not permitted 3334 * 3335 */ 3336 int virtqueue_resize(struct virtqueue *_vq, u32 num, 3337 void (*recycle)(struct virtqueue *vq, void *buf), 3338 void (*recycle_done)(struct virtqueue *vq)) 3339 { 3340 struct vring_virtqueue *vq = to_vvq(_vq); 3341 int err, err_reset; 3342 3343 if (num > vq->vq.num_max) 3344 return -E2BIG; 3345 3346 if (!num) 3347 return -EINVAL; 3348 3349 if (virtqueue_get_vring_size(_vq) == num) 3350 return 0; 3351 3352 err = virtqueue_disable_and_recycle(_vq, recycle); 3353 if (err) 3354 return err; 3355 if (recycle_done) 3356 recycle_done(_vq); 3357 3358 err = VIRTQUEUE_CALL(vq, resize, num); 3359 3360 err_reset = virtqueue_enable_after_reset(_vq); 3361 if (err_reset) 3362 return err_reset; 3363 3364 return err; 3365 } 3366 EXPORT_SYMBOL_GPL(virtqueue_resize); 3367 3368 /** 3369 * virtqueue_reset - detach and recycle all unused buffers 3370 * @_vq: the struct virtqueue we're talking about. 3371 * @recycle: callback to recycle unused buffers 3372 * @recycle_done: callback to be invoked when recycle for all unused buffers done 3373 * 3374 * Caller must ensure we don't call this with other virtqueue operations 3375 * at the same time (except where noted). 3376 * 3377 * Returns zero or a negative error. 3378 * 0: success. 3379 * -EBUSY: Failed to sync with device, vq may not work properly 3380 * -ENOENT: Transport or device not supported 3381 * -EPERM: Operation not permitted 3382 */ 3383 int virtqueue_reset(struct virtqueue *_vq, 3384 void (*recycle)(struct virtqueue *vq, void *buf), 3385 void (*recycle_done)(struct virtqueue *vq)) 3386 { 3387 struct vring_virtqueue *vq = to_vvq(_vq); 3388 int err; 3389 3390 err = virtqueue_disable_and_recycle(_vq, recycle); 3391 if (err) 3392 return err; 3393 if (recycle_done) 3394 recycle_done(_vq); 3395 3396 VOID_VIRTQUEUE_CALL(vq, reset); 3397 3398 return virtqueue_enable_after_reset(_vq); 3399 } 3400 EXPORT_SYMBOL_GPL(virtqueue_reset); 3401 3402 struct virtqueue *vring_new_virtqueue(unsigned int index, 3403 unsigned int num, 3404 unsigned int vring_align, 3405 struct virtio_device *vdev, 3406 bool weak_barriers, 3407 bool context, 3408 void *pages, 3409 bool (*notify)(struct virtqueue *vq), 3410 void (*callback)(struct virtqueue *vq), 3411 const char *name) 3412 { 3413 struct vring_virtqueue_split vring_split = {}; 3414 union virtio_map map = {.dma_dev = vdev->dev.parent}; 3415 3416 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) { 3417 struct vring_virtqueue_packed vring_packed = {}; 3418 3419 vring_packed.vring.num = num; 3420 vring_packed.vring.desc = pages; 3421 return __vring_new_virtqueue_packed(index, &vring_packed, 3422 vdev, weak_barriers, 3423 context, notify, callback, 3424 name, map); 3425 } 3426 3427 vring_init(&vring_split.vring, num, pages, vring_align); 3428 return __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers, 3429 context, notify, callback, name, 3430 map); 3431 } 3432 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 3433 3434 static void vring_free(struct virtqueue *_vq) 3435 { 3436 struct vring_virtqueue *vq = to_vvq(_vq); 3437 3438 if (vq->we_own_ring) { 3439 if (virtqueue_is_packed(vq)) { 3440 vring_free_queue(vq->vq.vdev, 3441 vq->packed.ring_size_in_bytes, 3442 vq->packed.vring.desc, 3443 vq->packed.ring_dma_addr, 3444 vq->map); 3445 3446 vring_free_queue(vq->vq.vdev, 3447 vq->packed.event_size_in_bytes, 3448 vq->packed.vring.driver, 3449 vq->packed.driver_event_dma_addr, 3450 vq->map); 3451 3452 vring_free_queue(vq->vq.vdev, 3453 vq->packed.event_size_in_bytes, 3454 vq->packed.vring.device, 3455 vq->packed.device_event_dma_addr, 3456 vq->map); 3457 3458 kfree(vq->packed.desc_state); 3459 kfree(vq->packed.desc_extra); 3460 } else { 3461 vring_free_queue(vq->vq.vdev, 3462 vq->split.queue_size_in_bytes, 3463 vq->split.vring.desc, 3464 vq->split.queue_dma_addr, 3465 vq->map); 3466 } 3467 } 3468 if (!virtqueue_is_packed(vq)) { 3469 kfree(vq->split.desc_state); 3470 kfree(vq->split.desc_extra); 3471 } 3472 } 3473 3474 void vring_del_virtqueue(struct virtqueue *_vq) 3475 { 3476 struct vring_virtqueue *vq = to_vvq(_vq); 3477 3478 spin_lock(&vq->vq.vdev->vqs_list_lock); 3479 list_del(&_vq->list); 3480 spin_unlock(&vq->vq.vdev->vqs_list_lock); 3481 3482 vring_free(_vq); 3483 3484 kfree(vq); 3485 } 3486 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 3487 3488 u32 vring_notification_data(struct virtqueue *_vq) 3489 { 3490 struct vring_virtqueue *vq = to_vvq(_vq); 3491 u16 next; 3492 3493 if (virtqueue_is_packed(vq)) 3494 next = (vq->packed.next_avail_idx & 3495 ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) | 3496 vq->packed.avail_wrap_counter << 3497 VRING_PACKED_EVENT_F_WRAP_CTR; 3498 else 3499 next = vq->split.avail_idx_shadow; 3500 3501 return next << 16 | _vq->index; 3502 } 3503 EXPORT_SYMBOL_GPL(vring_notification_data); 3504 3505 /* Manipulates transport-specific feature bits. */ 3506 void vring_transport_features(struct virtio_device *vdev) 3507 { 3508 unsigned int i; 3509 3510 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 3511 switch (i) { 3512 case VIRTIO_RING_F_INDIRECT_DESC: 3513 break; 3514 case VIRTIO_RING_F_EVENT_IDX: 3515 break; 3516 case VIRTIO_F_VERSION_1: 3517 break; 3518 case VIRTIO_F_ACCESS_PLATFORM: 3519 break; 3520 case VIRTIO_F_RING_PACKED: 3521 break; 3522 case VIRTIO_F_ORDER_PLATFORM: 3523 break; 3524 case VIRTIO_F_NOTIFICATION_DATA: 3525 break; 3526 case VIRTIO_F_IN_ORDER: 3527 break; 3528 default: 3529 /* We don't understand this bit. */ 3530 __virtio_clear_bit(vdev, i); 3531 } 3532 } 3533 } 3534 EXPORT_SYMBOL_GPL(vring_transport_features); 3535 3536 /** 3537 * virtqueue_get_vring_size - return the size of the virtqueue's vring 3538 * @_vq: the struct virtqueue containing the vring of interest. 3539 * 3540 * Returns the size of the vring. This is mainly used for boasting to 3541 * userspace. Unlike other operations, this need not be serialized. 3542 */ 3543 unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq) 3544 { 3545 3546 const struct vring_virtqueue *vq = to_vvq(_vq); 3547 3548 return virtqueue_is_packed(vq) ? vq->packed.vring.num : 3549 vq->split.vring.num; 3550 } 3551 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 3552 3553 /* 3554 * This function should only be called by the core, not directly by the driver. 3555 */ 3556 void __virtqueue_break(struct virtqueue *_vq) 3557 { 3558 struct vring_virtqueue *vq = to_vvq(_vq); 3559 3560 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 3561 WRITE_ONCE(vq->broken, true); 3562 } 3563 EXPORT_SYMBOL_GPL(__virtqueue_break); 3564 3565 /* 3566 * This function should only be called by the core, not directly by the driver. 3567 */ 3568 void __virtqueue_unbreak(struct virtqueue *_vq) 3569 { 3570 struct vring_virtqueue *vq = to_vvq(_vq); 3571 3572 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 3573 WRITE_ONCE(vq->broken, false); 3574 } 3575 EXPORT_SYMBOL_GPL(__virtqueue_unbreak); 3576 3577 bool virtqueue_is_broken(const struct virtqueue *_vq) 3578 { 3579 const struct vring_virtqueue *vq = to_vvq(_vq); 3580 3581 return READ_ONCE(vq->broken); 3582 } 3583 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 3584 3585 /* 3586 * This should prevent the device from being used, allowing drivers to 3587 * recover. You may need to grab appropriate locks to flush. 3588 */ 3589 void virtio_break_device(struct virtio_device *dev) 3590 { 3591 struct virtqueue *_vq; 3592 3593 spin_lock(&dev->vqs_list_lock); 3594 list_for_each_entry(_vq, &dev->vqs, list) { 3595 struct vring_virtqueue *vq = to_vvq(_vq); 3596 3597 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 3598 WRITE_ONCE(vq->broken, true); 3599 } 3600 spin_unlock(&dev->vqs_list_lock); 3601 } 3602 EXPORT_SYMBOL_GPL(virtio_break_device); 3603 3604 /* 3605 * This should allow the device to be used by the driver. You may 3606 * need to grab appropriate locks to flush the write to 3607 * vq->broken. This should only be used in some specific case e.g 3608 * (probing and restoring). This function should only be called by the 3609 * core, not directly by the driver. 3610 */ 3611 void __virtio_unbreak_device(struct virtio_device *dev) 3612 { 3613 struct virtqueue *_vq; 3614 3615 spin_lock(&dev->vqs_list_lock); 3616 list_for_each_entry(_vq, &dev->vqs, list) { 3617 struct vring_virtqueue *vq = to_vvq(_vq); 3618 3619 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 3620 WRITE_ONCE(vq->broken, false); 3621 } 3622 spin_unlock(&dev->vqs_list_lock); 3623 } 3624 EXPORT_SYMBOL_GPL(__virtio_unbreak_device); 3625 3626 dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq) 3627 { 3628 const struct vring_virtqueue *vq = to_vvq(_vq); 3629 3630 BUG_ON(!vq->we_own_ring); 3631 3632 if (virtqueue_is_packed(vq)) 3633 return vq->packed.ring_dma_addr; 3634 3635 return vq->split.queue_dma_addr; 3636 } 3637 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 3638 3639 dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq) 3640 { 3641 const struct vring_virtqueue *vq = to_vvq(_vq); 3642 3643 BUG_ON(!vq->we_own_ring); 3644 3645 if (virtqueue_is_packed(vq)) 3646 return vq->packed.driver_event_dma_addr; 3647 3648 return vq->split.queue_dma_addr + 3649 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 3650 } 3651 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 3652 3653 dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq) 3654 { 3655 const struct vring_virtqueue *vq = to_vvq(_vq); 3656 3657 BUG_ON(!vq->we_own_ring); 3658 3659 if (virtqueue_is_packed(vq)) 3660 return vq->packed.device_event_dma_addr; 3661 3662 return vq->split.queue_dma_addr + 3663 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 3664 } 3665 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 3666 3667 /* Only available for split ring */ 3668 const struct vring *virtqueue_get_vring(const struct virtqueue *vq) 3669 { 3670 return &to_vvq(vq)->split.vring; 3671 } 3672 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 3673 3674 /** 3675 * virtqueue_map_alloc_coherent - alloc coherent mapping 3676 * @vdev: the virtio device we are talking to 3677 * @map: metadata for performing mapping 3678 * @size: the size of the buffer 3679 * @map_handle: the pointer to the mapped address 3680 * @gfp: allocation flag (GFP_XXX) 3681 * 3682 * return virtual address or NULL on error 3683 */ 3684 void *virtqueue_map_alloc_coherent(struct virtio_device *vdev, 3685 union virtio_map map, 3686 size_t size, dma_addr_t *map_handle, 3687 gfp_t gfp) 3688 { 3689 if (vdev->map) 3690 return vdev->map->alloc(map, size, 3691 map_handle, gfp); 3692 else 3693 return dma_alloc_coherent(map.dma_dev, size, 3694 map_handle, gfp); 3695 } 3696 EXPORT_SYMBOL_GPL(virtqueue_map_alloc_coherent); 3697 3698 /** 3699 * virtqueue_map_free_coherent - free coherent mapping 3700 * @vdev: the virtio device we are talking to 3701 * @map: metadata for performing mapping 3702 * @size: the size of the buffer 3703 * @vaddr: the virtual address that needs to be freed 3704 * @map_handle: the mapped address that needs to be freed 3705 * 3706 */ 3707 void virtqueue_map_free_coherent(struct virtio_device *vdev, 3708 union virtio_map map, size_t size, void *vaddr, 3709 dma_addr_t map_handle) 3710 { 3711 if (vdev->map) 3712 vdev->map->free(map, size, vaddr, 3713 map_handle, 0); 3714 else 3715 dma_free_coherent(map.dma_dev, size, vaddr, map_handle); 3716 } 3717 EXPORT_SYMBOL_GPL(virtqueue_map_free_coherent); 3718 3719 /** 3720 * virtqueue_map_page_attrs - map a page to the device 3721 * @_vq: the virtqueue we are talking to 3722 * @page: the page that will be mapped by the device 3723 * @offset: the offset in the page for a buffer 3724 * @size: the buffer size 3725 * @dir: mapping direction 3726 * @attrs: mapping attributes 3727 * 3728 * Returns mapped address. Caller should check that by virtqueue_map_mapping_error(). 3729 */ 3730 dma_addr_t virtqueue_map_page_attrs(const struct virtqueue *_vq, 3731 struct page *page, 3732 unsigned long offset, 3733 size_t size, 3734 enum dma_data_direction dir, 3735 unsigned long attrs) 3736 { 3737 const struct vring_virtqueue *vq = to_vvq(_vq); 3738 struct virtio_device *vdev = _vq->vdev; 3739 3740 if (vdev->map) 3741 return vdev->map->map_page(vq->map, 3742 page, offset, size, 3743 dir, attrs); 3744 3745 return dma_map_page_attrs(vring_dma_dev(vq), 3746 page, offset, size, 3747 dir, attrs); 3748 } 3749 EXPORT_SYMBOL_GPL(virtqueue_map_page_attrs); 3750 3751 /** 3752 * virtqueue_unmap_page_attrs - map a page to the device 3753 * @_vq: the virtqueue we are talking to 3754 * @map_handle: the mapped address 3755 * @size: the buffer size 3756 * @dir: mapping direction 3757 * @attrs: unmapping attributes 3758 */ 3759 void virtqueue_unmap_page_attrs(const struct virtqueue *_vq, 3760 dma_addr_t map_handle, 3761 size_t size, enum dma_data_direction dir, 3762 unsigned long attrs) 3763 { 3764 const struct vring_virtqueue *vq = to_vvq(_vq); 3765 struct virtio_device *vdev = _vq->vdev; 3766 3767 if (vdev->map) 3768 vdev->map->unmap_page(vq->map, 3769 map_handle, size, dir, attrs); 3770 else 3771 dma_unmap_page_attrs(vring_dma_dev(vq), map_handle, 3772 size, dir, attrs); 3773 } 3774 EXPORT_SYMBOL_GPL(virtqueue_unmap_page_attrs); 3775 3776 /** 3777 * virtqueue_map_single_attrs - map DMA for _vq 3778 * @_vq: the struct virtqueue we're talking about. 3779 * @ptr: the pointer of the buffer to do dma 3780 * @size: the size of the buffer to do dma 3781 * @dir: DMA direction 3782 * @attrs: DMA Attrs 3783 * 3784 * The caller calls this to do dma mapping in advance. The DMA address can be 3785 * passed to this _vq when it is in pre-mapped mode. 3786 * 3787 * return mapped address. Caller should check that by virtqueue_map_mapping_error(). 3788 */ 3789 dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr, 3790 size_t size, 3791 enum dma_data_direction dir, 3792 unsigned long attrs) 3793 { 3794 const struct vring_virtqueue *vq = to_vvq(_vq); 3795 3796 if (!vq->use_map_api) { 3797 kmsan_handle_dma(virt_to_phys(ptr), size, dir); 3798 return (dma_addr_t)virt_to_phys(ptr); 3799 } 3800 3801 /* DMA must never operate on areas that might be remapped. */ 3802 if (dev_WARN_ONCE(&_vq->vdev->dev, is_vmalloc_addr(ptr), 3803 "rejecting DMA map of vmalloc memory\n")) 3804 return DMA_MAPPING_ERROR; 3805 3806 return virtqueue_map_page_attrs(&vq->vq, virt_to_page(ptr), 3807 offset_in_page(ptr), size, dir, attrs); 3808 } 3809 EXPORT_SYMBOL_GPL(virtqueue_map_single_attrs); 3810 3811 /** 3812 * virtqueue_unmap_single_attrs - unmap map for _vq 3813 * @_vq: the struct virtqueue we're talking about. 3814 * @addr: the dma address to unmap 3815 * @size: the size of the buffer 3816 * @dir: DMA direction 3817 * @attrs: DMA Attrs 3818 * 3819 * Unmap the address that is mapped by the virtqueue_map_* APIs. 3820 * 3821 */ 3822 void virtqueue_unmap_single_attrs(const struct virtqueue *_vq, 3823 dma_addr_t addr, 3824 size_t size, enum dma_data_direction dir, 3825 unsigned long attrs) 3826 { 3827 const struct vring_virtqueue *vq = to_vvq(_vq); 3828 3829 if (!vq->use_map_api) 3830 return; 3831 3832 virtqueue_unmap_page_attrs(_vq, addr, size, dir, attrs); 3833 } 3834 EXPORT_SYMBOL_GPL(virtqueue_unmap_single_attrs); 3835 3836 /** 3837 * virtqueue_map_mapping_error - check dma address 3838 * @_vq: the struct virtqueue we're talking about. 3839 * @addr: DMA address 3840 * 3841 * Returns 0 means dma valid. Other means invalid dma address. 3842 */ 3843 int virtqueue_map_mapping_error(const struct virtqueue *_vq, dma_addr_t addr) 3844 { 3845 const struct vring_virtqueue *vq = to_vvq(_vq); 3846 3847 return vring_mapping_error(vq, addr); 3848 } 3849 EXPORT_SYMBOL_GPL(virtqueue_map_mapping_error); 3850 3851 /** 3852 * virtqueue_map_need_sync - check a dma address needs sync 3853 * @_vq: the struct virtqueue we're talking about. 3854 * @addr: DMA address 3855 * 3856 * Check if the dma address mapped by the virtqueue_map_* APIs needs to be 3857 * synchronized 3858 * 3859 * return bool 3860 */ 3861 bool virtqueue_map_need_sync(const struct virtqueue *_vq, dma_addr_t addr) 3862 { 3863 const struct vring_virtqueue *vq = to_vvq(_vq); 3864 struct virtio_device *vdev = _vq->vdev; 3865 3866 if (!vq->use_map_api) 3867 return false; 3868 3869 if (vdev->map) 3870 return vdev->map->need_sync(vq->map, addr); 3871 else 3872 return dma_need_sync(vring_dma_dev(vq), addr); 3873 } 3874 EXPORT_SYMBOL_GPL(virtqueue_map_need_sync); 3875 3876 /** 3877 * virtqueue_map_sync_single_range_for_cpu - map sync for cpu 3878 * @_vq: the struct virtqueue we're talking about. 3879 * @addr: DMA address 3880 * @offset: DMA address offset 3881 * @size: buf size for sync 3882 * @dir: DMA direction 3883 * 3884 * Before calling this function, use virtqueue_map_need_sync() to confirm that 3885 * the DMA address really needs to be synchronized 3886 * 3887 */ 3888 void virtqueue_map_sync_single_range_for_cpu(const struct virtqueue *_vq, 3889 dma_addr_t addr, 3890 unsigned long offset, size_t size, 3891 enum dma_data_direction dir) 3892 { 3893 const struct vring_virtqueue *vq = to_vvq(_vq); 3894 struct virtio_device *vdev = _vq->vdev; 3895 3896 if (!vq->use_map_api) 3897 return; 3898 3899 if (vdev->map) 3900 vdev->map->sync_single_for_cpu(vq->map, 3901 addr + offset, size, dir); 3902 else 3903 dma_sync_single_range_for_cpu(vring_dma_dev(vq), 3904 addr, offset, size, dir); 3905 } 3906 EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_cpu); 3907 3908 /** 3909 * virtqueue_map_sync_single_range_for_device - map sync for device 3910 * @_vq: the struct virtqueue we're talking about. 3911 * @addr: DMA address 3912 * @offset: DMA address offset 3913 * @size: buf size for sync 3914 * @dir: DMA direction 3915 * 3916 * Before calling this function, use virtqueue_map_need_sync() to confirm that 3917 * the DMA address really needs to be synchronized 3918 */ 3919 void virtqueue_map_sync_single_range_for_device(const struct virtqueue *_vq, 3920 dma_addr_t addr, 3921 unsigned long offset, size_t size, 3922 enum dma_data_direction dir) 3923 { 3924 const struct vring_virtqueue *vq = to_vvq(_vq); 3925 struct virtio_device *vdev = _vq->vdev; 3926 3927 if (!vq->use_map_api) 3928 return; 3929 3930 if (vdev->map) 3931 vdev->map->sync_single_for_device(vq->map, 3932 addr + offset, 3933 size, dir); 3934 else 3935 dma_sync_single_range_for_device(vring_dma_dev(vq), addr, 3936 offset, size, dir); 3937 } 3938 EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_device); 3939 3940 MODULE_DESCRIPTION("Virtio ring implementation"); 3941 MODULE_LICENSE("GPL"); 3942