1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Virtio ring implementation. 3 * 4 * Copyright 2007 Rusty Russell IBM Corporation 5 */ 6 #include <linux/virtio.h> 7 #include <linux/virtio_ring.h> 8 #include <linux/virtio_config.h> 9 #include <linux/device.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/hrtimer.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/kmsan.h> 15 #include <linux/spinlock.h> 16 #include <xen/xen.h> 17 18 #ifdef DEBUG 19 /* For development, we want to crash whenever the ring is screwed. */ 20 #define BAD_RING(_vq, fmt, args...) \ 21 do { \ 22 dev_err(&(_vq)->vq.vdev->dev, \ 23 "%s:"fmt, (_vq)->vq.name, ##args); \ 24 BUG(); \ 25 } while (0) 26 /* Caller is supposed to guarantee no reentry. */ 27 #define START_USE(_vq) \ 28 do { \ 29 if ((_vq)->in_use) \ 30 panic("%s:in_use = %i\n", \ 31 (_vq)->vq.name, (_vq)->in_use); \ 32 (_vq)->in_use = __LINE__; \ 33 } while (0) 34 #define END_USE(_vq) \ 35 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 36 #define LAST_ADD_TIME_UPDATE(_vq) \ 37 do { \ 38 ktime_t now = ktime_get(); \ 39 \ 40 /* No kick or get, with .1 second between? Warn. */ \ 41 if ((_vq)->last_add_time_valid) \ 42 WARN_ON(ktime_to_ms(ktime_sub(now, \ 43 (_vq)->last_add_time)) > 100); \ 44 (_vq)->last_add_time = now; \ 45 (_vq)->last_add_time_valid = true; \ 46 } while (0) 47 #define LAST_ADD_TIME_CHECK(_vq) \ 48 do { \ 49 if ((_vq)->last_add_time_valid) { \ 50 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 51 (_vq)->last_add_time)) > 100); \ 52 } \ 53 } while (0) 54 #define LAST_ADD_TIME_INVALID(_vq) \ 55 ((_vq)->last_add_time_valid = false) 56 #else 57 #define BAD_RING(_vq, fmt, args...) \ 58 do { \ 59 dev_err(&_vq->vq.vdev->dev, \ 60 "%s:"fmt, (_vq)->vq.name, ##args); \ 61 (_vq)->broken = true; \ 62 } while (0) 63 #define START_USE(vq) 64 #define END_USE(vq) 65 #define LAST_ADD_TIME_UPDATE(vq) 66 #define LAST_ADD_TIME_CHECK(vq) 67 #define LAST_ADD_TIME_INVALID(vq) 68 #endif 69 70 struct vring_desc_state_split { 71 void *data; /* Data for callback. */ 72 73 /* Indirect desc table and extra table, if any. These two will be 74 * allocated together. So we won't stress more to the memory allocator. 75 */ 76 struct vring_desc *indir_desc; 77 }; 78 79 struct vring_desc_state_packed { 80 void *data; /* Data for callback. */ 81 82 /* Indirect desc table and extra table, if any. These two will be 83 * allocated together. So we won't stress more to the memory allocator. 84 */ 85 struct vring_packed_desc *indir_desc; 86 u16 num; /* Descriptor list length. */ 87 u16 last; /* The last desc state in a list. */ 88 }; 89 90 struct vring_desc_extra { 91 dma_addr_t addr; /* Descriptor DMA addr. */ 92 u32 len; /* Descriptor length. */ 93 u16 flags; /* Descriptor flags. */ 94 u16 next; /* The next desc state in a list. */ 95 }; 96 97 struct vring_virtqueue_split { 98 /* Actual memory layout for this queue. */ 99 struct vring vring; 100 101 /* Last written value to avail->flags */ 102 u16 avail_flags_shadow; 103 104 /* 105 * Last written value to avail->idx in 106 * guest byte order. 107 */ 108 u16 avail_idx_shadow; 109 110 /* Per-descriptor state. */ 111 struct vring_desc_state_split *desc_state; 112 struct vring_desc_extra *desc_extra; 113 114 /* DMA address and size information */ 115 dma_addr_t queue_dma_addr; 116 size_t queue_size_in_bytes; 117 118 /* 119 * The parameters for creating vrings are reserved for creating new 120 * vring. 121 */ 122 u32 vring_align; 123 bool may_reduce_num; 124 }; 125 126 struct vring_virtqueue_packed { 127 /* Actual memory layout for this queue. */ 128 struct { 129 unsigned int num; 130 struct vring_packed_desc *desc; 131 struct vring_packed_desc_event *driver; 132 struct vring_packed_desc_event *device; 133 } vring; 134 135 /* Driver ring wrap counter. */ 136 bool avail_wrap_counter; 137 138 /* Avail used flags. */ 139 u16 avail_used_flags; 140 141 /* Index of the next avail descriptor. */ 142 u16 next_avail_idx; 143 144 /* 145 * Last written value to driver->flags in 146 * guest byte order. 147 */ 148 u16 event_flags_shadow; 149 150 /* Per-descriptor state. */ 151 struct vring_desc_state_packed *desc_state; 152 struct vring_desc_extra *desc_extra; 153 154 /* DMA address and size information */ 155 dma_addr_t ring_dma_addr; 156 dma_addr_t driver_event_dma_addr; 157 dma_addr_t device_event_dma_addr; 158 size_t ring_size_in_bytes; 159 size_t event_size_in_bytes; 160 }; 161 162 struct vring_virtqueue { 163 struct virtqueue vq; 164 165 /* Is this a packed ring? */ 166 bool packed_ring; 167 168 /* Is DMA API used? */ 169 bool use_dma_api; 170 171 /* Can we use weak barriers? */ 172 bool weak_barriers; 173 174 /* Other side has made a mess, don't try any more. */ 175 bool broken; 176 177 /* Host supports indirect buffers */ 178 bool indirect; 179 180 /* Host publishes avail event idx */ 181 bool event; 182 183 /* Head of free buffer list. */ 184 unsigned int free_head; 185 /* Number we've added since last sync. */ 186 unsigned int num_added; 187 188 /* Last used index we've seen. 189 * for split ring, it just contains last used index 190 * for packed ring: 191 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index. 192 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter. 193 */ 194 u16 last_used_idx; 195 196 /* Hint for event idx: already triggered no need to disable. */ 197 bool event_triggered; 198 199 union { 200 /* Available for split ring */ 201 struct vring_virtqueue_split split; 202 203 /* Available for packed ring */ 204 struct vring_virtqueue_packed packed; 205 }; 206 207 /* How to notify other side. FIXME: commonalize hcalls! */ 208 bool (*notify)(struct virtqueue *vq); 209 210 /* DMA, allocation, and size information */ 211 bool we_own_ring; 212 213 /* Device used for doing DMA */ 214 struct device *dma_dev; 215 216 #ifdef DEBUG 217 /* They're supposed to lock for us. */ 218 unsigned int in_use; 219 220 /* Figure out if their kicks are too delayed. */ 221 bool last_add_time_valid; 222 ktime_t last_add_time; 223 #endif 224 }; 225 226 static struct virtqueue *__vring_new_virtqueue(unsigned int index, 227 struct vring_virtqueue_split *vring_split, 228 struct virtio_device *vdev, 229 bool weak_barriers, 230 bool context, 231 bool (*notify)(struct virtqueue *), 232 void (*callback)(struct virtqueue *), 233 const char *name, 234 struct device *dma_dev); 235 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num); 236 static void vring_free(struct virtqueue *_vq); 237 238 /* 239 * Helpers. 240 */ 241 242 #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq) 243 244 static bool virtqueue_use_indirect(const struct vring_virtqueue *vq, 245 unsigned int total_sg) 246 { 247 /* 248 * If the host supports indirect descriptor tables, and we have multiple 249 * buffers, then go indirect. FIXME: tune this threshold 250 */ 251 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 252 } 253 254 /* 255 * Modern virtio devices have feature bits to specify whether they need a 256 * quirk and bypass the IOMMU. If not there, just use the DMA API. 257 * 258 * If there, the interaction between virtio and DMA API is messy. 259 * 260 * On most systems with virtio, physical addresses match bus addresses, 261 * and it doesn't particularly matter whether we use the DMA API. 262 * 263 * On some systems, including Xen and any system with a physical device 264 * that speaks virtio behind a physical IOMMU, we must use the DMA API 265 * for virtio DMA to work at all. 266 * 267 * On other systems, including SPARC and PPC64, virtio-pci devices are 268 * enumerated as though they are behind an IOMMU, but the virtio host 269 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 270 * there or somehow map everything as the identity. 271 * 272 * For the time being, we preserve historic behavior and bypass the DMA 273 * API. 274 * 275 * TODO: install a per-device DMA ops structure that does the right thing 276 * taking into account all the above quirks, and use the DMA API 277 * unconditionally on data path. 278 */ 279 280 static bool vring_use_dma_api(const struct virtio_device *vdev) 281 { 282 if (!virtio_has_dma_quirk(vdev)) 283 return true; 284 285 /* Otherwise, we are left to guess. */ 286 /* 287 * In theory, it's possible to have a buggy QEMU-supposed 288 * emulated Q35 IOMMU and Xen enabled at the same time. On 289 * such a configuration, virtio has never worked and will 290 * not work without an even larger kludge. Instead, enable 291 * the DMA API if we're a Xen guest, which at least allows 292 * all of the sensible Xen configurations to work correctly. 293 */ 294 if (xen_domain()) 295 return true; 296 297 return false; 298 } 299 300 static bool vring_need_unmap_buffer(const struct vring_virtqueue *vring, 301 const struct vring_desc_extra *extra) 302 { 303 return vring->use_dma_api && (extra->addr != DMA_MAPPING_ERROR); 304 } 305 306 size_t virtio_max_dma_size(const struct virtio_device *vdev) 307 { 308 size_t max_segment_size = SIZE_MAX; 309 310 if (vring_use_dma_api(vdev)) 311 max_segment_size = dma_max_mapping_size(vdev->dev.parent); 312 313 return max_segment_size; 314 } 315 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 316 317 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 318 dma_addr_t *dma_handle, gfp_t flag, 319 struct device *dma_dev) 320 { 321 if (vring_use_dma_api(vdev)) { 322 return dma_alloc_coherent(dma_dev, size, 323 dma_handle, flag); 324 } else { 325 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 326 327 if (queue) { 328 phys_addr_t phys_addr = virt_to_phys(queue); 329 *dma_handle = (dma_addr_t)phys_addr; 330 331 /* 332 * Sanity check: make sure we dind't truncate 333 * the address. The only arches I can find that 334 * have 64-bit phys_addr_t but 32-bit dma_addr_t 335 * are certain non-highmem MIPS and x86 336 * configurations, but these configurations 337 * should never allocate physical pages above 32 338 * bits, so this is fine. Just in case, throw a 339 * warning and abort if we end up with an 340 * unrepresentable address. 341 */ 342 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 343 free_pages_exact(queue, PAGE_ALIGN(size)); 344 return NULL; 345 } 346 } 347 return queue; 348 } 349 } 350 351 static void vring_free_queue(struct virtio_device *vdev, size_t size, 352 void *queue, dma_addr_t dma_handle, 353 struct device *dma_dev) 354 { 355 if (vring_use_dma_api(vdev)) 356 dma_free_coherent(dma_dev, size, queue, dma_handle); 357 else 358 free_pages_exact(queue, PAGE_ALIGN(size)); 359 } 360 361 /* 362 * The DMA ops on various arches are rather gnarly right now, and 363 * making all of the arch DMA ops work on the vring device itself 364 * is a mess. 365 */ 366 static struct device *vring_dma_dev(const struct vring_virtqueue *vq) 367 { 368 return vq->dma_dev; 369 } 370 371 /* Map one sg entry. */ 372 static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg, 373 enum dma_data_direction direction, dma_addr_t *addr, 374 u32 *len, bool premapped) 375 { 376 if (premapped) { 377 *addr = sg_dma_address(sg); 378 *len = sg_dma_len(sg); 379 return 0; 380 } 381 382 *len = sg->length; 383 384 if (!vq->use_dma_api) { 385 /* 386 * If DMA is not used, KMSAN doesn't know that the scatterlist 387 * is initialized by the hardware. Explicitly check/unpoison it 388 * depending on the direction. 389 */ 390 kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction); 391 *addr = (dma_addr_t)sg_phys(sg); 392 return 0; 393 } 394 395 /* 396 * We can't use dma_map_sg, because we don't use scatterlists in 397 * the way it expects (we don't guarantee that the scatterlist 398 * will exist for the lifetime of the mapping). 399 */ 400 *addr = dma_map_page(vring_dma_dev(vq), 401 sg_page(sg), sg->offset, sg->length, 402 direction); 403 404 if (dma_mapping_error(vring_dma_dev(vq), *addr)) 405 return -ENOMEM; 406 407 return 0; 408 } 409 410 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 411 void *cpu_addr, size_t size, 412 enum dma_data_direction direction) 413 { 414 if (!vq->use_dma_api) 415 return (dma_addr_t)virt_to_phys(cpu_addr); 416 417 return dma_map_single(vring_dma_dev(vq), 418 cpu_addr, size, direction); 419 } 420 421 static int vring_mapping_error(const struct vring_virtqueue *vq, 422 dma_addr_t addr) 423 { 424 if (!vq->use_dma_api) 425 return 0; 426 427 return dma_mapping_error(vring_dma_dev(vq), addr); 428 } 429 430 static void virtqueue_init(struct vring_virtqueue *vq, u32 num) 431 { 432 vq->vq.num_free = num; 433 434 if (vq->packed_ring) 435 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR); 436 else 437 vq->last_used_idx = 0; 438 439 vq->event_triggered = false; 440 vq->num_added = 0; 441 442 #ifdef DEBUG 443 vq->in_use = false; 444 vq->last_add_time_valid = false; 445 #endif 446 } 447 448 449 /* 450 * Split ring specific functions - *_split(). 451 */ 452 453 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, 454 struct vring_desc_extra *extra) 455 { 456 u16 flags; 457 458 flags = extra->flags; 459 460 if (flags & VRING_DESC_F_INDIRECT) { 461 if (!vq->use_dma_api) 462 goto out; 463 464 dma_unmap_single(vring_dma_dev(vq), 465 extra->addr, 466 extra->len, 467 (flags & VRING_DESC_F_WRITE) ? 468 DMA_FROM_DEVICE : DMA_TO_DEVICE); 469 } else { 470 if (!vring_need_unmap_buffer(vq, extra)) 471 goto out; 472 473 dma_unmap_page(vring_dma_dev(vq), 474 extra->addr, 475 extra->len, 476 (flags & VRING_DESC_F_WRITE) ? 477 DMA_FROM_DEVICE : DMA_TO_DEVICE); 478 } 479 480 out: 481 return extra->next; 482 } 483 484 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 485 unsigned int total_sg, 486 gfp_t gfp) 487 { 488 struct vring_desc_extra *extra; 489 struct vring_desc *desc; 490 unsigned int i, size; 491 492 /* 493 * We require lowmem mappings for the descriptors because 494 * otherwise virt_to_phys will give us bogus addresses in the 495 * virtqueue. 496 */ 497 gfp &= ~__GFP_HIGHMEM; 498 499 size = sizeof(*desc) * total_sg + sizeof(*extra) * total_sg; 500 501 desc = kmalloc(size, gfp); 502 if (!desc) 503 return NULL; 504 505 extra = (struct vring_desc_extra *)&desc[total_sg]; 506 507 for (i = 0; i < total_sg; i++) 508 extra[i].next = i + 1; 509 510 return desc; 511 } 512 513 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, 514 struct vring_desc *desc, 515 struct vring_desc_extra *extra, 516 unsigned int i, 517 dma_addr_t addr, 518 unsigned int len, 519 u16 flags, bool premapped) 520 { 521 u16 next; 522 523 desc[i].flags = cpu_to_virtio16(vq->vdev, flags); 524 desc[i].addr = cpu_to_virtio64(vq->vdev, addr); 525 desc[i].len = cpu_to_virtio32(vq->vdev, len); 526 527 extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr; 528 extra[i].len = len; 529 extra[i].flags = flags; 530 531 next = extra[i].next; 532 533 desc[i].next = cpu_to_virtio16(vq->vdev, next); 534 535 return next; 536 } 537 538 static inline int virtqueue_add_split(struct virtqueue *_vq, 539 struct scatterlist *sgs[], 540 unsigned int total_sg, 541 unsigned int out_sgs, 542 unsigned int in_sgs, 543 void *data, 544 void *ctx, 545 bool premapped, 546 gfp_t gfp) 547 { 548 struct vring_virtqueue *vq = to_vvq(_vq); 549 struct vring_desc_extra *extra; 550 struct scatterlist *sg; 551 struct vring_desc *desc; 552 unsigned int i, n, avail, descs_used, prev, err_idx; 553 int head; 554 bool indirect; 555 556 START_USE(vq); 557 558 BUG_ON(data == NULL); 559 BUG_ON(ctx && vq->indirect); 560 561 if (unlikely(vq->broken)) { 562 END_USE(vq); 563 return -EIO; 564 } 565 566 LAST_ADD_TIME_UPDATE(vq); 567 568 BUG_ON(total_sg == 0); 569 570 head = vq->free_head; 571 572 if (virtqueue_use_indirect(vq, total_sg)) 573 desc = alloc_indirect_split(_vq, total_sg, gfp); 574 else { 575 desc = NULL; 576 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 577 } 578 579 if (desc) { 580 /* Use a single buffer which doesn't continue */ 581 indirect = true; 582 /* Set up rest to use this indirect table. */ 583 i = 0; 584 descs_used = 1; 585 extra = (struct vring_desc_extra *)&desc[total_sg]; 586 } else { 587 indirect = false; 588 desc = vq->split.vring.desc; 589 extra = vq->split.desc_extra; 590 i = head; 591 descs_used = total_sg; 592 } 593 594 if (unlikely(vq->vq.num_free < descs_used)) { 595 pr_debug("Can't add buf len %i - avail = %i\n", 596 descs_used, vq->vq.num_free); 597 /* FIXME: for historical reasons, we force a notify here if 598 * there are outgoing parts to the buffer. Presumably the 599 * host should service the ring ASAP. */ 600 if (out_sgs) 601 vq->notify(&vq->vq); 602 if (indirect) 603 kfree(desc); 604 END_USE(vq); 605 return -ENOSPC; 606 } 607 608 for (n = 0; n < out_sgs; n++) { 609 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 610 dma_addr_t addr; 611 u32 len; 612 613 if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr, &len, premapped)) 614 goto unmap_release; 615 616 prev = i; 617 /* Note that we trust indirect descriptor 618 * table since it use stream DMA mapping. 619 */ 620 i = virtqueue_add_desc_split(_vq, desc, extra, i, addr, len, 621 VRING_DESC_F_NEXT, 622 premapped); 623 } 624 } 625 for (; n < (out_sgs + in_sgs); n++) { 626 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 627 dma_addr_t addr; 628 u32 len; 629 630 if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr, &len, premapped)) 631 goto unmap_release; 632 633 prev = i; 634 /* Note that we trust indirect descriptor 635 * table since it use stream DMA mapping. 636 */ 637 i = virtqueue_add_desc_split(_vq, desc, extra, i, addr, len, 638 VRING_DESC_F_NEXT | 639 VRING_DESC_F_WRITE, 640 premapped); 641 } 642 } 643 /* Last one doesn't continue. */ 644 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 645 if (!indirect && vring_need_unmap_buffer(vq, &extra[prev])) 646 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= 647 ~VRING_DESC_F_NEXT; 648 649 if (indirect) { 650 /* Now that the indirect table is filled in, map it. */ 651 dma_addr_t addr = vring_map_single( 652 vq, desc, total_sg * sizeof(struct vring_desc), 653 DMA_TO_DEVICE); 654 if (vring_mapping_error(vq, addr)) 655 goto unmap_release; 656 657 virtqueue_add_desc_split(_vq, vq->split.vring.desc, 658 vq->split.desc_extra, 659 head, addr, 660 total_sg * sizeof(struct vring_desc), 661 VRING_DESC_F_INDIRECT, false); 662 } 663 664 /* We're using some buffers from the free list. */ 665 vq->vq.num_free -= descs_used; 666 667 /* Update free pointer */ 668 if (indirect) 669 vq->free_head = vq->split.desc_extra[head].next; 670 else 671 vq->free_head = i; 672 673 /* Store token and indirect buffer state. */ 674 vq->split.desc_state[head].data = data; 675 if (indirect) 676 vq->split.desc_state[head].indir_desc = desc; 677 else 678 vq->split.desc_state[head].indir_desc = ctx; 679 680 /* Put entry in available array (but don't update avail->idx until they 681 * do sync). */ 682 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 683 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 684 685 /* Descriptors and available array need to be set before we expose the 686 * new available array entries. */ 687 virtio_wmb(vq->weak_barriers); 688 vq->split.avail_idx_shadow++; 689 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 690 vq->split.avail_idx_shadow); 691 vq->num_added++; 692 693 pr_debug("Added buffer head %i to %p\n", head, vq); 694 END_USE(vq); 695 696 /* This is very unlikely, but theoretically possible. Kick 697 * just in case. */ 698 if (unlikely(vq->num_added == (1 << 16) - 1)) 699 virtqueue_kick(_vq); 700 701 return 0; 702 703 unmap_release: 704 err_idx = i; 705 706 if (indirect) 707 i = 0; 708 else 709 i = head; 710 711 for (n = 0; n < total_sg; n++) { 712 if (i == err_idx) 713 break; 714 715 i = vring_unmap_one_split(vq, &extra[i]); 716 } 717 718 if (indirect) 719 kfree(desc); 720 721 END_USE(vq); 722 return -ENOMEM; 723 } 724 725 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 726 { 727 struct vring_virtqueue *vq = to_vvq(_vq); 728 u16 new, old; 729 bool needs_kick; 730 731 START_USE(vq); 732 /* We need to expose available array entries before checking avail 733 * event. */ 734 virtio_mb(vq->weak_barriers); 735 736 old = vq->split.avail_idx_shadow - vq->num_added; 737 new = vq->split.avail_idx_shadow; 738 vq->num_added = 0; 739 740 LAST_ADD_TIME_CHECK(vq); 741 LAST_ADD_TIME_INVALID(vq); 742 743 if (vq->event) { 744 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 745 vring_avail_event(&vq->split.vring)), 746 new, old); 747 } else { 748 needs_kick = !(vq->split.vring.used->flags & 749 cpu_to_virtio16(_vq->vdev, 750 VRING_USED_F_NO_NOTIFY)); 751 } 752 END_USE(vq); 753 return needs_kick; 754 } 755 756 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 757 void **ctx) 758 { 759 struct vring_desc_extra *extra; 760 unsigned int i, j; 761 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 762 763 /* Clear data ptr. */ 764 vq->split.desc_state[head].data = NULL; 765 766 extra = vq->split.desc_extra; 767 768 /* Put back on free list: unmap first-level descriptors and find end */ 769 i = head; 770 771 while (vq->split.vring.desc[i].flags & nextflag) { 772 vring_unmap_one_split(vq, &extra[i]); 773 i = vq->split.desc_extra[i].next; 774 vq->vq.num_free++; 775 } 776 777 vring_unmap_one_split(vq, &extra[i]); 778 vq->split.desc_extra[i].next = vq->free_head; 779 vq->free_head = head; 780 781 /* Plus final descriptor */ 782 vq->vq.num_free++; 783 784 if (vq->indirect) { 785 struct vring_desc *indir_desc = 786 vq->split.desc_state[head].indir_desc; 787 u32 len, num; 788 789 /* Free the indirect table, if any, now that it's unmapped. */ 790 if (!indir_desc) 791 return; 792 len = vq->split.desc_extra[head].len; 793 794 BUG_ON(!(vq->split.desc_extra[head].flags & 795 VRING_DESC_F_INDIRECT)); 796 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 797 798 num = len / sizeof(struct vring_desc); 799 800 extra = (struct vring_desc_extra *)&indir_desc[num]; 801 802 if (vq->use_dma_api) { 803 for (j = 0; j < num; j++) 804 vring_unmap_one_split(vq, &extra[j]); 805 } 806 807 kfree(indir_desc); 808 vq->split.desc_state[head].indir_desc = NULL; 809 } else if (ctx) { 810 *ctx = vq->split.desc_state[head].indir_desc; 811 } 812 } 813 814 static bool more_used_split(const struct vring_virtqueue *vq) 815 { 816 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 817 vq->split.vring.used->idx); 818 } 819 820 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 821 unsigned int *len, 822 void **ctx) 823 { 824 struct vring_virtqueue *vq = to_vvq(_vq); 825 void *ret; 826 unsigned int i; 827 u16 last_used; 828 829 START_USE(vq); 830 831 if (unlikely(vq->broken)) { 832 END_USE(vq); 833 return NULL; 834 } 835 836 if (!more_used_split(vq)) { 837 pr_debug("No more buffers in queue\n"); 838 END_USE(vq); 839 return NULL; 840 } 841 842 /* Only get used array entries after they have been exposed by host. */ 843 virtio_rmb(vq->weak_barriers); 844 845 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 846 i = virtio32_to_cpu(_vq->vdev, 847 vq->split.vring.used->ring[last_used].id); 848 *len = virtio32_to_cpu(_vq->vdev, 849 vq->split.vring.used->ring[last_used].len); 850 851 if (unlikely(i >= vq->split.vring.num)) { 852 BAD_RING(vq, "id %u out of range\n", i); 853 return NULL; 854 } 855 if (unlikely(!vq->split.desc_state[i].data)) { 856 BAD_RING(vq, "id %u is not a head!\n", i); 857 return NULL; 858 } 859 860 /* detach_buf_split clears data, so grab it now. */ 861 ret = vq->split.desc_state[i].data; 862 detach_buf_split(vq, i, ctx); 863 vq->last_used_idx++; 864 /* If we expect an interrupt for the next entry, tell host 865 * by writing event index and flush out the write before 866 * the read in the next get_buf call. */ 867 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 868 virtio_store_mb(vq->weak_barriers, 869 &vring_used_event(&vq->split.vring), 870 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 871 872 LAST_ADD_TIME_INVALID(vq); 873 874 END_USE(vq); 875 return ret; 876 } 877 878 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 879 { 880 struct vring_virtqueue *vq = to_vvq(_vq); 881 882 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 883 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 884 885 /* 886 * If device triggered an event already it won't trigger one again: 887 * no need to disable. 888 */ 889 if (vq->event_triggered) 890 return; 891 892 if (vq->event) 893 /* TODO: this is a hack. Figure out a cleaner value to write. */ 894 vring_used_event(&vq->split.vring) = 0x0; 895 else 896 vq->split.vring.avail->flags = 897 cpu_to_virtio16(_vq->vdev, 898 vq->split.avail_flags_shadow); 899 } 900 } 901 902 static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 903 { 904 struct vring_virtqueue *vq = to_vvq(_vq); 905 u16 last_used_idx; 906 907 START_USE(vq); 908 909 /* We optimistically turn back on interrupts, then check if there was 910 * more to do. */ 911 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 912 * either clear the flags bit or point the event index at the next 913 * entry. Always do both to keep code simple. */ 914 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 915 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 916 if (!vq->event) 917 vq->split.vring.avail->flags = 918 cpu_to_virtio16(_vq->vdev, 919 vq->split.avail_flags_shadow); 920 } 921 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 922 last_used_idx = vq->last_used_idx); 923 END_USE(vq); 924 return last_used_idx; 925 } 926 927 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx) 928 { 929 struct vring_virtqueue *vq = to_vvq(_vq); 930 931 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 932 vq->split.vring.used->idx); 933 } 934 935 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 936 { 937 struct vring_virtqueue *vq = to_vvq(_vq); 938 u16 bufs; 939 940 START_USE(vq); 941 942 /* We optimistically turn back on interrupts, then check if there was 943 * more to do. */ 944 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 945 * either clear the flags bit or point the event index at the next 946 * entry. Always update the event index to keep code simple. */ 947 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 948 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 949 if (!vq->event) 950 vq->split.vring.avail->flags = 951 cpu_to_virtio16(_vq->vdev, 952 vq->split.avail_flags_shadow); 953 } 954 /* TODO: tune this threshold */ 955 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 956 957 virtio_store_mb(vq->weak_barriers, 958 &vring_used_event(&vq->split.vring), 959 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 960 961 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 962 - vq->last_used_idx) > bufs)) { 963 END_USE(vq); 964 return false; 965 } 966 967 END_USE(vq); 968 return true; 969 } 970 971 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 972 { 973 struct vring_virtqueue *vq = to_vvq(_vq); 974 unsigned int i; 975 void *buf; 976 977 START_USE(vq); 978 979 for (i = 0; i < vq->split.vring.num; i++) { 980 if (!vq->split.desc_state[i].data) 981 continue; 982 /* detach_buf_split clears data, so grab it now. */ 983 buf = vq->split.desc_state[i].data; 984 detach_buf_split(vq, i, NULL); 985 vq->split.avail_idx_shadow--; 986 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 987 vq->split.avail_idx_shadow); 988 END_USE(vq); 989 return buf; 990 } 991 /* That should have freed everything. */ 992 BUG_ON(vq->vq.num_free != vq->split.vring.num); 993 994 END_USE(vq); 995 return NULL; 996 } 997 998 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split, 999 struct vring_virtqueue *vq) 1000 { 1001 struct virtio_device *vdev; 1002 1003 vdev = vq->vq.vdev; 1004 1005 vring_split->avail_flags_shadow = 0; 1006 vring_split->avail_idx_shadow = 0; 1007 1008 /* No callback? Tell other side not to bother us. */ 1009 if (!vq->vq.callback) { 1010 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 1011 if (!vq->event) 1012 vring_split->vring.avail->flags = cpu_to_virtio16(vdev, 1013 vring_split->avail_flags_shadow); 1014 } 1015 } 1016 1017 static void virtqueue_reinit_split(struct vring_virtqueue *vq) 1018 { 1019 int num; 1020 1021 num = vq->split.vring.num; 1022 1023 vq->split.vring.avail->flags = 0; 1024 vq->split.vring.avail->idx = 0; 1025 1026 /* reset avail event */ 1027 vq->split.vring.avail->ring[num] = 0; 1028 1029 vq->split.vring.used->flags = 0; 1030 vq->split.vring.used->idx = 0; 1031 1032 /* reset used event */ 1033 *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0; 1034 1035 virtqueue_init(vq, num); 1036 1037 virtqueue_vring_init_split(&vq->split, vq); 1038 } 1039 1040 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq, 1041 struct vring_virtqueue_split *vring_split) 1042 { 1043 vq->split = *vring_split; 1044 1045 /* Put everything in free lists. */ 1046 vq->free_head = 0; 1047 } 1048 1049 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split) 1050 { 1051 struct vring_desc_state_split *state; 1052 struct vring_desc_extra *extra; 1053 u32 num = vring_split->vring.num; 1054 1055 state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL); 1056 if (!state) 1057 goto err_state; 1058 1059 extra = vring_alloc_desc_extra(num); 1060 if (!extra) 1061 goto err_extra; 1062 1063 memset(state, 0, num * sizeof(struct vring_desc_state_split)); 1064 1065 vring_split->desc_state = state; 1066 vring_split->desc_extra = extra; 1067 return 0; 1068 1069 err_extra: 1070 kfree(state); 1071 err_state: 1072 return -ENOMEM; 1073 } 1074 1075 static void vring_free_split(struct vring_virtqueue_split *vring_split, 1076 struct virtio_device *vdev, struct device *dma_dev) 1077 { 1078 vring_free_queue(vdev, vring_split->queue_size_in_bytes, 1079 vring_split->vring.desc, 1080 vring_split->queue_dma_addr, 1081 dma_dev); 1082 1083 kfree(vring_split->desc_state); 1084 kfree(vring_split->desc_extra); 1085 } 1086 1087 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, 1088 struct virtio_device *vdev, 1089 u32 num, 1090 unsigned int vring_align, 1091 bool may_reduce_num, 1092 struct device *dma_dev) 1093 { 1094 void *queue = NULL; 1095 dma_addr_t dma_addr; 1096 1097 /* We assume num is a power of 2. */ 1098 if (!is_power_of_2(num)) { 1099 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 1100 return -EINVAL; 1101 } 1102 1103 /* TODO: allocate each queue chunk individually */ 1104 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 1105 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1106 &dma_addr, 1107 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1108 dma_dev); 1109 if (queue) 1110 break; 1111 if (!may_reduce_num) 1112 return -ENOMEM; 1113 } 1114 1115 if (!num) 1116 return -ENOMEM; 1117 1118 if (!queue) { 1119 /* Try to get a single page. You are my only hope! */ 1120 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1121 &dma_addr, GFP_KERNEL | __GFP_ZERO, 1122 dma_dev); 1123 } 1124 if (!queue) 1125 return -ENOMEM; 1126 1127 vring_init(&vring_split->vring, num, queue, vring_align); 1128 1129 vring_split->queue_dma_addr = dma_addr; 1130 vring_split->queue_size_in_bytes = vring_size(num, vring_align); 1131 1132 vring_split->vring_align = vring_align; 1133 vring_split->may_reduce_num = may_reduce_num; 1134 1135 return 0; 1136 } 1137 1138 static struct virtqueue *vring_create_virtqueue_split( 1139 unsigned int index, 1140 unsigned int num, 1141 unsigned int vring_align, 1142 struct virtio_device *vdev, 1143 bool weak_barriers, 1144 bool may_reduce_num, 1145 bool context, 1146 bool (*notify)(struct virtqueue *), 1147 void (*callback)(struct virtqueue *), 1148 const char *name, 1149 struct device *dma_dev) 1150 { 1151 struct vring_virtqueue_split vring_split = {}; 1152 struct virtqueue *vq; 1153 int err; 1154 1155 err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align, 1156 may_reduce_num, dma_dev); 1157 if (err) 1158 return NULL; 1159 1160 vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers, 1161 context, notify, callback, name, dma_dev); 1162 if (!vq) { 1163 vring_free_split(&vring_split, vdev, dma_dev); 1164 return NULL; 1165 } 1166 1167 to_vvq(vq)->we_own_ring = true; 1168 1169 return vq; 1170 } 1171 1172 static int virtqueue_resize_split(struct virtqueue *_vq, u32 num) 1173 { 1174 struct vring_virtqueue_split vring_split = {}; 1175 struct vring_virtqueue *vq = to_vvq(_vq); 1176 struct virtio_device *vdev = _vq->vdev; 1177 int err; 1178 1179 err = vring_alloc_queue_split(&vring_split, vdev, num, 1180 vq->split.vring_align, 1181 vq->split.may_reduce_num, 1182 vring_dma_dev(vq)); 1183 if (err) 1184 goto err; 1185 1186 err = vring_alloc_state_extra_split(&vring_split); 1187 if (err) 1188 goto err_state_extra; 1189 1190 vring_free(&vq->vq); 1191 1192 virtqueue_vring_init_split(&vring_split, vq); 1193 1194 virtqueue_init(vq, vring_split.vring.num); 1195 virtqueue_vring_attach_split(vq, &vring_split); 1196 1197 return 0; 1198 1199 err_state_extra: 1200 vring_free_split(&vring_split, vdev, vring_dma_dev(vq)); 1201 err: 1202 virtqueue_reinit_split(vq); 1203 return -ENOMEM; 1204 } 1205 1206 1207 /* 1208 * Packed ring specific functions - *_packed(). 1209 */ 1210 static bool packed_used_wrap_counter(u16 last_used_idx) 1211 { 1212 return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 1213 } 1214 1215 static u16 packed_last_used(u16 last_used_idx) 1216 { 1217 return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 1218 } 1219 1220 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, 1221 const struct vring_desc_extra *extra) 1222 { 1223 u16 flags; 1224 1225 flags = extra->flags; 1226 1227 if (flags & VRING_DESC_F_INDIRECT) { 1228 if (!vq->use_dma_api) 1229 return; 1230 1231 dma_unmap_single(vring_dma_dev(vq), 1232 extra->addr, extra->len, 1233 (flags & VRING_DESC_F_WRITE) ? 1234 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1235 } else { 1236 if (!vring_need_unmap_buffer(vq, extra)) 1237 return; 1238 1239 dma_unmap_page(vring_dma_dev(vq), 1240 extra->addr, extra->len, 1241 (flags & VRING_DESC_F_WRITE) ? 1242 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1243 } 1244 } 1245 1246 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 1247 gfp_t gfp) 1248 { 1249 struct vring_desc_extra *extra; 1250 struct vring_packed_desc *desc; 1251 int i, size; 1252 1253 /* 1254 * We require lowmem mappings for the descriptors because 1255 * otherwise virt_to_phys will give us bogus addresses in the 1256 * virtqueue. 1257 */ 1258 gfp &= ~__GFP_HIGHMEM; 1259 1260 size = (sizeof(*desc) + sizeof(*extra)) * total_sg; 1261 1262 desc = kmalloc(size, gfp); 1263 if (!desc) 1264 return NULL; 1265 1266 extra = (struct vring_desc_extra *)&desc[total_sg]; 1267 1268 for (i = 0; i < total_sg; i++) 1269 extra[i].next = i + 1; 1270 1271 return desc; 1272 } 1273 1274 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 1275 struct scatterlist *sgs[], 1276 unsigned int total_sg, 1277 unsigned int out_sgs, 1278 unsigned int in_sgs, 1279 void *data, 1280 bool premapped, 1281 gfp_t gfp) 1282 { 1283 struct vring_desc_extra *extra; 1284 struct vring_packed_desc *desc; 1285 struct scatterlist *sg; 1286 unsigned int i, n, err_idx, len; 1287 u16 head, id; 1288 dma_addr_t addr; 1289 1290 head = vq->packed.next_avail_idx; 1291 desc = alloc_indirect_packed(total_sg, gfp); 1292 if (!desc) 1293 return -ENOMEM; 1294 1295 extra = (struct vring_desc_extra *)&desc[total_sg]; 1296 1297 if (unlikely(vq->vq.num_free < 1)) { 1298 pr_debug("Can't add buf len 1 - avail = 0\n"); 1299 kfree(desc); 1300 END_USE(vq); 1301 return -ENOSPC; 1302 } 1303 1304 i = 0; 1305 id = vq->free_head; 1306 BUG_ON(id == vq->packed.vring.num); 1307 1308 for (n = 0; n < out_sgs + in_sgs; n++) { 1309 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1310 if (vring_map_one_sg(vq, sg, n < out_sgs ? 1311 DMA_TO_DEVICE : DMA_FROM_DEVICE, 1312 &addr, &len, premapped)) 1313 goto unmap_release; 1314 1315 desc[i].flags = cpu_to_le16(n < out_sgs ? 1316 0 : VRING_DESC_F_WRITE); 1317 desc[i].addr = cpu_to_le64(addr); 1318 desc[i].len = cpu_to_le32(len); 1319 1320 if (unlikely(vq->use_dma_api)) { 1321 extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr; 1322 extra[i].len = len; 1323 extra[i].flags = n < out_sgs ? 0 : VRING_DESC_F_WRITE; 1324 } 1325 1326 i++; 1327 } 1328 } 1329 1330 /* Now that the indirect table is filled in, map it. */ 1331 addr = vring_map_single(vq, desc, 1332 total_sg * sizeof(struct vring_packed_desc), 1333 DMA_TO_DEVICE); 1334 if (vring_mapping_error(vq, addr)) 1335 goto unmap_release; 1336 1337 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1338 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1339 sizeof(struct vring_packed_desc)); 1340 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1341 1342 if (vq->use_dma_api) { 1343 vq->packed.desc_extra[id].addr = addr; 1344 vq->packed.desc_extra[id].len = total_sg * 1345 sizeof(struct vring_packed_desc); 1346 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1347 vq->packed.avail_used_flags; 1348 } 1349 1350 /* 1351 * A driver MUST NOT make the first descriptor in the list 1352 * available before all subsequent descriptors comprising 1353 * the list are made available. 1354 */ 1355 virtio_wmb(vq->weak_barriers); 1356 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1357 vq->packed.avail_used_flags); 1358 1359 /* We're using some buffers from the free list. */ 1360 vq->vq.num_free -= 1; 1361 1362 /* Update free pointer */ 1363 n = head + 1; 1364 if (n >= vq->packed.vring.num) { 1365 n = 0; 1366 vq->packed.avail_wrap_counter ^= 1; 1367 vq->packed.avail_used_flags ^= 1368 1 << VRING_PACKED_DESC_F_AVAIL | 1369 1 << VRING_PACKED_DESC_F_USED; 1370 } 1371 vq->packed.next_avail_idx = n; 1372 vq->free_head = vq->packed.desc_extra[id].next; 1373 1374 /* Store token and indirect buffer state. */ 1375 vq->packed.desc_state[id].num = 1; 1376 vq->packed.desc_state[id].data = data; 1377 vq->packed.desc_state[id].indir_desc = desc; 1378 vq->packed.desc_state[id].last = id; 1379 1380 vq->num_added += 1; 1381 1382 pr_debug("Added buffer head %i to %p\n", head, vq); 1383 END_USE(vq); 1384 1385 return 0; 1386 1387 unmap_release: 1388 err_idx = i; 1389 1390 for (i = 0; i < err_idx; i++) 1391 vring_unmap_extra_packed(vq, &extra[i]); 1392 1393 kfree(desc); 1394 1395 END_USE(vq); 1396 return -ENOMEM; 1397 } 1398 1399 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1400 struct scatterlist *sgs[], 1401 unsigned int total_sg, 1402 unsigned int out_sgs, 1403 unsigned int in_sgs, 1404 void *data, 1405 void *ctx, 1406 bool premapped, 1407 gfp_t gfp) 1408 { 1409 struct vring_virtqueue *vq = to_vvq(_vq); 1410 struct vring_packed_desc *desc; 1411 struct scatterlist *sg; 1412 unsigned int i, n, c, descs_used, err_idx, len; 1413 __le16 head_flags, flags; 1414 u16 head, id, prev, curr, avail_used_flags; 1415 int err; 1416 1417 START_USE(vq); 1418 1419 BUG_ON(data == NULL); 1420 BUG_ON(ctx && vq->indirect); 1421 1422 if (unlikely(vq->broken)) { 1423 END_USE(vq); 1424 return -EIO; 1425 } 1426 1427 LAST_ADD_TIME_UPDATE(vq); 1428 1429 BUG_ON(total_sg == 0); 1430 1431 if (virtqueue_use_indirect(vq, total_sg)) { 1432 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1433 in_sgs, data, premapped, gfp); 1434 if (err != -ENOMEM) { 1435 END_USE(vq); 1436 return err; 1437 } 1438 1439 /* fall back on direct */ 1440 } 1441 1442 head = vq->packed.next_avail_idx; 1443 avail_used_flags = vq->packed.avail_used_flags; 1444 1445 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1446 1447 desc = vq->packed.vring.desc; 1448 i = head; 1449 descs_used = total_sg; 1450 1451 if (unlikely(vq->vq.num_free < descs_used)) { 1452 pr_debug("Can't add buf len %i - avail = %i\n", 1453 descs_used, vq->vq.num_free); 1454 END_USE(vq); 1455 return -ENOSPC; 1456 } 1457 1458 id = vq->free_head; 1459 BUG_ON(id == vq->packed.vring.num); 1460 1461 curr = id; 1462 c = 0; 1463 for (n = 0; n < out_sgs + in_sgs; n++) { 1464 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1465 dma_addr_t addr; 1466 1467 if (vring_map_one_sg(vq, sg, n < out_sgs ? 1468 DMA_TO_DEVICE : DMA_FROM_DEVICE, 1469 &addr, &len, premapped)) 1470 goto unmap_release; 1471 1472 flags = cpu_to_le16(vq->packed.avail_used_flags | 1473 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1474 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1475 if (i == head) 1476 head_flags = flags; 1477 else 1478 desc[i].flags = flags; 1479 1480 desc[i].addr = cpu_to_le64(addr); 1481 desc[i].len = cpu_to_le32(len); 1482 desc[i].id = cpu_to_le16(id); 1483 1484 if (unlikely(vq->use_dma_api)) { 1485 vq->packed.desc_extra[curr].addr = premapped ? 1486 DMA_MAPPING_ERROR : addr; 1487 vq->packed.desc_extra[curr].len = len; 1488 vq->packed.desc_extra[curr].flags = 1489 le16_to_cpu(flags); 1490 } 1491 prev = curr; 1492 curr = vq->packed.desc_extra[curr].next; 1493 1494 if ((unlikely(++i >= vq->packed.vring.num))) { 1495 i = 0; 1496 vq->packed.avail_used_flags ^= 1497 1 << VRING_PACKED_DESC_F_AVAIL | 1498 1 << VRING_PACKED_DESC_F_USED; 1499 } 1500 } 1501 } 1502 1503 if (i <= head) 1504 vq->packed.avail_wrap_counter ^= 1; 1505 1506 /* We're using some buffers from the free list. */ 1507 vq->vq.num_free -= descs_used; 1508 1509 /* Update free pointer */ 1510 vq->packed.next_avail_idx = i; 1511 vq->free_head = curr; 1512 1513 /* Store token. */ 1514 vq->packed.desc_state[id].num = descs_used; 1515 vq->packed.desc_state[id].data = data; 1516 vq->packed.desc_state[id].indir_desc = ctx; 1517 vq->packed.desc_state[id].last = prev; 1518 1519 /* 1520 * A driver MUST NOT make the first descriptor in the list 1521 * available before all subsequent descriptors comprising 1522 * the list are made available. 1523 */ 1524 virtio_wmb(vq->weak_barriers); 1525 vq->packed.vring.desc[head].flags = head_flags; 1526 vq->num_added += descs_used; 1527 1528 pr_debug("Added buffer head %i to %p\n", head, vq); 1529 END_USE(vq); 1530 1531 return 0; 1532 1533 unmap_release: 1534 err_idx = i; 1535 i = head; 1536 curr = vq->free_head; 1537 1538 vq->packed.avail_used_flags = avail_used_flags; 1539 1540 for (n = 0; n < total_sg; n++) { 1541 if (i == err_idx) 1542 break; 1543 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); 1544 curr = vq->packed.desc_extra[curr].next; 1545 i++; 1546 if (i >= vq->packed.vring.num) 1547 i = 0; 1548 } 1549 1550 END_USE(vq); 1551 return -EIO; 1552 } 1553 1554 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1555 { 1556 struct vring_virtqueue *vq = to_vvq(_vq); 1557 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1558 bool needs_kick; 1559 union { 1560 struct { 1561 __le16 off_wrap; 1562 __le16 flags; 1563 }; 1564 u32 u32; 1565 } snapshot; 1566 1567 START_USE(vq); 1568 1569 /* 1570 * We need to expose the new flags value before checking notification 1571 * suppressions. 1572 */ 1573 virtio_mb(vq->weak_barriers); 1574 1575 old = vq->packed.next_avail_idx - vq->num_added; 1576 new = vq->packed.next_avail_idx; 1577 vq->num_added = 0; 1578 1579 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1580 flags = le16_to_cpu(snapshot.flags); 1581 1582 LAST_ADD_TIME_CHECK(vq); 1583 LAST_ADD_TIME_INVALID(vq); 1584 1585 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1586 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1587 goto out; 1588 } 1589 1590 off_wrap = le16_to_cpu(snapshot.off_wrap); 1591 1592 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1593 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1594 if (wrap_counter != vq->packed.avail_wrap_counter) 1595 event_idx -= vq->packed.vring.num; 1596 1597 needs_kick = vring_need_event(event_idx, new, old); 1598 out: 1599 END_USE(vq); 1600 return needs_kick; 1601 } 1602 1603 static void detach_buf_packed(struct vring_virtqueue *vq, 1604 unsigned int id, void **ctx) 1605 { 1606 struct vring_desc_state_packed *state = NULL; 1607 struct vring_packed_desc *desc; 1608 unsigned int i, curr; 1609 1610 state = &vq->packed.desc_state[id]; 1611 1612 /* Clear data ptr. */ 1613 state->data = NULL; 1614 1615 vq->packed.desc_extra[state->last].next = vq->free_head; 1616 vq->free_head = id; 1617 vq->vq.num_free += state->num; 1618 1619 if (unlikely(vq->use_dma_api)) { 1620 curr = id; 1621 for (i = 0; i < state->num; i++) { 1622 vring_unmap_extra_packed(vq, 1623 &vq->packed.desc_extra[curr]); 1624 curr = vq->packed.desc_extra[curr].next; 1625 } 1626 } 1627 1628 if (vq->indirect) { 1629 struct vring_desc_extra *extra; 1630 u32 len, num; 1631 1632 /* Free the indirect table, if any, now that it's unmapped. */ 1633 desc = state->indir_desc; 1634 if (!desc) 1635 return; 1636 1637 if (vq->use_dma_api) { 1638 len = vq->packed.desc_extra[id].len; 1639 num = len / sizeof(struct vring_packed_desc); 1640 1641 extra = (struct vring_desc_extra *)&desc[num]; 1642 1643 for (i = 0; i < num; i++) 1644 vring_unmap_extra_packed(vq, &extra[i]); 1645 } 1646 kfree(desc); 1647 state->indir_desc = NULL; 1648 } else if (ctx) { 1649 *ctx = state->indir_desc; 1650 } 1651 } 1652 1653 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1654 u16 idx, bool used_wrap_counter) 1655 { 1656 bool avail, used; 1657 u16 flags; 1658 1659 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1660 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1661 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1662 1663 return avail == used && used == used_wrap_counter; 1664 } 1665 1666 static bool more_used_packed(const struct vring_virtqueue *vq) 1667 { 1668 u16 last_used; 1669 u16 last_used_idx; 1670 bool used_wrap_counter; 1671 1672 last_used_idx = READ_ONCE(vq->last_used_idx); 1673 last_used = packed_last_used(last_used_idx); 1674 used_wrap_counter = packed_used_wrap_counter(last_used_idx); 1675 return is_used_desc_packed(vq, last_used, used_wrap_counter); 1676 } 1677 1678 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1679 unsigned int *len, 1680 void **ctx) 1681 { 1682 struct vring_virtqueue *vq = to_vvq(_vq); 1683 u16 last_used, id, last_used_idx; 1684 bool used_wrap_counter; 1685 void *ret; 1686 1687 START_USE(vq); 1688 1689 if (unlikely(vq->broken)) { 1690 END_USE(vq); 1691 return NULL; 1692 } 1693 1694 if (!more_used_packed(vq)) { 1695 pr_debug("No more buffers in queue\n"); 1696 END_USE(vq); 1697 return NULL; 1698 } 1699 1700 /* Only get used elements after they have been exposed by host. */ 1701 virtio_rmb(vq->weak_barriers); 1702 1703 last_used_idx = READ_ONCE(vq->last_used_idx); 1704 used_wrap_counter = packed_used_wrap_counter(last_used_idx); 1705 last_used = packed_last_used(last_used_idx); 1706 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1707 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1708 1709 if (unlikely(id >= vq->packed.vring.num)) { 1710 BAD_RING(vq, "id %u out of range\n", id); 1711 return NULL; 1712 } 1713 if (unlikely(!vq->packed.desc_state[id].data)) { 1714 BAD_RING(vq, "id %u is not a head!\n", id); 1715 return NULL; 1716 } 1717 1718 /* detach_buf_packed clears data, so grab it now. */ 1719 ret = vq->packed.desc_state[id].data; 1720 detach_buf_packed(vq, id, ctx); 1721 1722 last_used += vq->packed.desc_state[id].num; 1723 if (unlikely(last_used >= vq->packed.vring.num)) { 1724 last_used -= vq->packed.vring.num; 1725 used_wrap_counter ^= 1; 1726 } 1727 1728 last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1729 WRITE_ONCE(vq->last_used_idx, last_used); 1730 1731 /* 1732 * If we expect an interrupt for the next entry, tell host 1733 * by writing event index and flush out the write before 1734 * the read in the next get_buf call. 1735 */ 1736 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1737 virtio_store_mb(vq->weak_barriers, 1738 &vq->packed.vring.driver->off_wrap, 1739 cpu_to_le16(vq->last_used_idx)); 1740 1741 LAST_ADD_TIME_INVALID(vq); 1742 1743 END_USE(vq); 1744 return ret; 1745 } 1746 1747 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1748 { 1749 struct vring_virtqueue *vq = to_vvq(_vq); 1750 1751 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1752 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1753 1754 /* 1755 * If device triggered an event already it won't trigger one again: 1756 * no need to disable. 1757 */ 1758 if (vq->event_triggered) 1759 return; 1760 1761 vq->packed.vring.driver->flags = 1762 cpu_to_le16(vq->packed.event_flags_shadow); 1763 } 1764 } 1765 1766 static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1767 { 1768 struct vring_virtqueue *vq = to_vvq(_vq); 1769 1770 START_USE(vq); 1771 1772 /* 1773 * We optimistically turn back on interrupts, then check if there was 1774 * more to do. 1775 */ 1776 1777 if (vq->event) { 1778 vq->packed.vring.driver->off_wrap = 1779 cpu_to_le16(vq->last_used_idx); 1780 /* 1781 * We need to update event offset and event wrap 1782 * counter first before updating event flags. 1783 */ 1784 virtio_wmb(vq->weak_barriers); 1785 } 1786 1787 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1788 vq->packed.event_flags_shadow = vq->event ? 1789 VRING_PACKED_EVENT_FLAG_DESC : 1790 VRING_PACKED_EVENT_FLAG_ENABLE; 1791 vq->packed.vring.driver->flags = 1792 cpu_to_le16(vq->packed.event_flags_shadow); 1793 } 1794 1795 END_USE(vq); 1796 return vq->last_used_idx; 1797 } 1798 1799 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1800 { 1801 struct vring_virtqueue *vq = to_vvq(_vq); 1802 bool wrap_counter; 1803 u16 used_idx; 1804 1805 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1806 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1807 1808 return is_used_desc_packed(vq, used_idx, wrap_counter); 1809 } 1810 1811 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1812 { 1813 struct vring_virtqueue *vq = to_vvq(_vq); 1814 u16 used_idx, wrap_counter, last_used_idx; 1815 u16 bufs; 1816 1817 START_USE(vq); 1818 1819 /* 1820 * We optimistically turn back on interrupts, then check if there was 1821 * more to do. 1822 */ 1823 1824 if (vq->event) { 1825 /* TODO: tune this threshold */ 1826 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1827 last_used_idx = READ_ONCE(vq->last_used_idx); 1828 wrap_counter = packed_used_wrap_counter(last_used_idx); 1829 1830 used_idx = packed_last_used(last_used_idx) + bufs; 1831 if (used_idx >= vq->packed.vring.num) { 1832 used_idx -= vq->packed.vring.num; 1833 wrap_counter ^= 1; 1834 } 1835 1836 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1837 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1838 1839 /* 1840 * We need to update event offset and event wrap 1841 * counter first before updating event flags. 1842 */ 1843 virtio_wmb(vq->weak_barriers); 1844 } 1845 1846 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1847 vq->packed.event_flags_shadow = vq->event ? 1848 VRING_PACKED_EVENT_FLAG_DESC : 1849 VRING_PACKED_EVENT_FLAG_ENABLE; 1850 vq->packed.vring.driver->flags = 1851 cpu_to_le16(vq->packed.event_flags_shadow); 1852 } 1853 1854 /* 1855 * We need to update event suppression structure first 1856 * before re-checking for more used buffers. 1857 */ 1858 virtio_mb(vq->weak_barriers); 1859 1860 last_used_idx = READ_ONCE(vq->last_used_idx); 1861 wrap_counter = packed_used_wrap_counter(last_used_idx); 1862 used_idx = packed_last_used(last_used_idx); 1863 if (is_used_desc_packed(vq, used_idx, wrap_counter)) { 1864 END_USE(vq); 1865 return false; 1866 } 1867 1868 END_USE(vq); 1869 return true; 1870 } 1871 1872 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1873 { 1874 struct vring_virtqueue *vq = to_vvq(_vq); 1875 unsigned int i; 1876 void *buf; 1877 1878 START_USE(vq); 1879 1880 for (i = 0; i < vq->packed.vring.num; i++) { 1881 if (!vq->packed.desc_state[i].data) 1882 continue; 1883 /* detach_buf clears data, so grab it now. */ 1884 buf = vq->packed.desc_state[i].data; 1885 detach_buf_packed(vq, i, NULL); 1886 END_USE(vq); 1887 return buf; 1888 } 1889 /* That should have freed everything. */ 1890 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1891 1892 END_USE(vq); 1893 return NULL; 1894 } 1895 1896 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num) 1897 { 1898 struct vring_desc_extra *desc_extra; 1899 unsigned int i; 1900 1901 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), 1902 GFP_KERNEL); 1903 if (!desc_extra) 1904 return NULL; 1905 1906 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); 1907 1908 for (i = 0; i < num - 1; i++) 1909 desc_extra[i].next = i + 1; 1910 1911 return desc_extra; 1912 } 1913 1914 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed, 1915 struct virtio_device *vdev, 1916 struct device *dma_dev) 1917 { 1918 if (vring_packed->vring.desc) 1919 vring_free_queue(vdev, vring_packed->ring_size_in_bytes, 1920 vring_packed->vring.desc, 1921 vring_packed->ring_dma_addr, 1922 dma_dev); 1923 1924 if (vring_packed->vring.driver) 1925 vring_free_queue(vdev, vring_packed->event_size_in_bytes, 1926 vring_packed->vring.driver, 1927 vring_packed->driver_event_dma_addr, 1928 dma_dev); 1929 1930 if (vring_packed->vring.device) 1931 vring_free_queue(vdev, vring_packed->event_size_in_bytes, 1932 vring_packed->vring.device, 1933 vring_packed->device_event_dma_addr, 1934 dma_dev); 1935 1936 kfree(vring_packed->desc_state); 1937 kfree(vring_packed->desc_extra); 1938 } 1939 1940 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, 1941 struct virtio_device *vdev, 1942 u32 num, struct device *dma_dev) 1943 { 1944 struct vring_packed_desc *ring; 1945 struct vring_packed_desc_event *driver, *device; 1946 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1947 size_t ring_size_in_bytes, event_size_in_bytes; 1948 1949 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1950 1951 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1952 &ring_dma_addr, 1953 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1954 dma_dev); 1955 if (!ring) 1956 goto err; 1957 1958 vring_packed->vring.desc = ring; 1959 vring_packed->ring_dma_addr = ring_dma_addr; 1960 vring_packed->ring_size_in_bytes = ring_size_in_bytes; 1961 1962 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1963 1964 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1965 &driver_event_dma_addr, 1966 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1967 dma_dev); 1968 if (!driver) 1969 goto err; 1970 1971 vring_packed->vring.driver = driver; 1972 vring_packed->event_size_in_bytes = event_size_in_bytes; 1973 vring_packed->driver_event_dma_addr = driver_event_dma_addr; 1974 1975 device = vring_alloc_queue(vdev, event_size_in_bytes, 1976 &device_event_dma_addr, 1977 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1978 dma_dev); 1979 if (!device) 1980 goto err; 1981 1982 vring_packed->vring.device = device; 1983 vring_packed->device_event_dma_addr = device_event_dma_addr; 1984 1985 vring_packed->vring.num = num; 1986 1987 return 0; 1988 1989 err: 1990 vring_free_packed(vring_packed, vdev, dma_dev); 1991 return -ENOMEM; 1992 } 1993 1994 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed) 1995 { 1996 struct vring_desc_state_packed *state; 1997 struct vring_desc_extra *extra; 1998 u32 num = vring_packed->vring.num; 1999 2000 state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL); 2001 if (!state) 2002 goto err_desc_state; 2003 2004 memset(state, 0, num * sizeof(struct vring_desc_state_packed)); 2005 2006 extra = vring_alloc_desc_extra(num); 2007 if (!extra) 2008 goto err_desc_extra; 2009 2010 vring_packed->desc_state = state; 2011 vring_packed->desc_extra = extra; 2012 2013 return 0; 2014 2015 err_desc_extra: 2016 kfree(state); 2017 err_desc_state: 2018 return -ENOMEM; 2019 } 2020 2021 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed, 2022 bool callback) 2023 { 2024 vring_packed->next_avail_idx = 0; 2025 vring_packed->avail_wrap_counter = 1; 2026 vring_packed->event_flags_shadow = 0; 2027 vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 2028 2029 /* No callback? Tell other side not to bother us. */ 2030 if (!callback) { 2031 vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 2032 vring_packed->vring.driver->flags = 2033 cpu_to_le16(vring_packed->event_flags_shadow); 2034 } 2035 } 2036 2037 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq, 2038 struct vring_virtqueue_packed *vring_packed) 2039 { 2040 vq->packed = *vring_packed; 2041 2042 /* Put everything in free lists. */ 2043 vq->free_head = 0; 2044 } 2045 2046 static void virtqueue_reinit_packed(struct vring_virtqueue *vq) 2047 { 2048 memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes); 2049 memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes); 2050 2051 /* we need to reset the desc.flags. For more, see is_used_desc_packed() */ 2052 memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes); 2053 2054 virtqueue_init(vq, vq->packed.vring.num); 2055 virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback); 2056 } 2057 2058 static struct virtqueue *vring_create_virtqueue_packed( 2059 unsigned int index, 2060 unsigned int num, 2061 unsigned int vring_align, 2062 struct virtio_device *vdev, 2063 bool weak_barriers, 2064 bool may_reduce_num, 2065 bool context, 2066 bool (*notify)(struct virtqueue *), 2067 void (*callback)(struct virtqueue *), 2068 const char *name, 2069 struct device *dma_dev) 2070 { 2071 struct vring_virtqueue_packed vring_packed = {}; 2072 struct vring_virtqueue *vq; 2073 int err; 2074 2075 if (vring_alloc_queue_packed(&vring_packed, vdev, num, dma_dev)) 2076 goto err_ring; 2077 2078 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2079 if (!vq) 2080 goto err_vq; 2081 2082 vq->vq.callback = callback; 2083 vq->vq.vdev = vdev; 2084 vq->vq.name = name; 2085 vq->vq.index = index; 2086 vq->vq.reset = false; 2087 vq->we_own_ring = true; 2088 vq->notify = notify; 2089 vq->weak_barriers = weak_barriers; 2090 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 2091 vq->broken = true; 2092 #else 2093 vq->broken = false; 2094 #endif 2095 vq->packed_ring = true; 2096 vq->dma_dev = dma_dev; 2097 vq->use_dma_api = vring_use_dma_api(vdev); 2098 2099 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2100 !context; 2101 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2102 2103 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2104 vq->weak_barriers = false; 2105 2106 err = vring_alloc_state_extra_packed(&vring_packed); 2107 if (err) 2108 goto err_state_extra; 2109 2110 virtqueue_vring_init_packed(&vring_packed, !!callback); 2111 2112 virtqueue_init(vq, num); 2113 virtqueue_vring_attach_packed(vq, &vring_packed); 2114 2115 spin_lock(&vdev->vqs_list_lock); 2116 list_add_tail(&vq->vq.list, &vdev->vqs); 2117 spin_unlock(&vdev->vqs_list_lock); 2118 return &vq->vq; 2119 2120 err_state_extra: 2121 kfree(vq); 2122 err_vq: 2123 vring_free_packed(&vring_packed, vdev, dma_dev); 2124 err_ring: 2125 return NULL; 2126 } 2127 2128 static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num) 2129 { 2130 struct vring_virtqueue_packed vring_packed = {}; 2131 struct vring_virtqueue *vq = to_vvq(_vq); 2132 struct virtio_device *vdev = _vq->vdev; 2133 int err; 2134 2135 if (vring_alloc_queue_packed(&vring_packed, vdev, num, vring_dma_dev(vq))) 2136 goto err_ring; 2137 2138 err = vring_alloc_state_extra_packed(&vring_packed); 2139 if (err) 2140 goto err_state_extra; 2141 2142 vring_free(&vq->vq); 2143 2144 virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback); 2145 2146 virtqueue_init(vq, vring_packed.vring.num); 2147 virtqueue_vring_attach_packed(vq, &vring_packed); 2148 2149 return 0; 2150 2151 err_state_extra: 2152 vring_free_packed(&vring_packed, vdev, vring_dma_dev(vq)); 2153 err_ring: 2154 virtqueue_reinit_packed(vq); 2155 return -ENOMEM; 2156 } 2157 2158 static int virtqueue_disable_and_recycle(struct virtqueue *_vq, 2159 void (*recycle)(struct virtqueue *vq, void *buf)) 2160 { 2161 struct vring_virtqueue *vq = to_vvq(_vq); 2162 struct virtio_device *vdev = vq->vq.vdev; 2163 void *buf; 2164 int err; 2165 2166 if (!vq->we_own_ring) 2167 return -EPERM; 2168 2169 if (!vdev->config->disable_vq_and_reset) 2170 return -ENOENT; 2171 2172 if (!vdev->config->enable_vq_after_reset) 2173 return -ENOENT; 2174 2175 err = vdev->config->disable_vq_and_reset(_vq); 2176 if (err) 2177 return err; 2178 2179 while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) 2180 recycle(_vq, buf); 2181 2182 return 0; 2183 } 2184 2185 static int virtqueue_enable_after_reset(struct virtqueue *_vq) 2186 { 2187 struct vring_virtqueue *vq = to_vvq(_vq); 2188 struct virtio_device *vdev = vq->vq.vdev; 2189 2190 if (vdev->config->enable_vq_after_reset(_vq)) 2191 return -EBUSY; 2192 2193 return 0; 2194 } 2195 2196 /* 2197 * Generic functions and exported symbols. 2198 */ 2199 2200 static inline int virtqueue_add(struct virtqueue *_vq, 2201 struct scatterlist *sgs[], 2202 unsigned int total_sg, 2203 unsigned int out_sgs, 2204 unsigned int in_sgs, 2205 void *data, 2206 void *ctx, 2207 bool premapped, 2208 gfp_t gfp) 2209 { 2210 struct vring_virtqueue *vq = to_vvq(_vq); 2211 2212 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 2213 out_sgs, in_sgs, data, ctx, premapped, gfp) : 2214 virtqueue_add_split(_vq, sgs, total_sg, 2215 out_sgs, in_sgs, data, ctx, premapped, gfp); 2216 } 2217 2218 /** 2219 * virtqueue_add_sgs - expose buffers to other end 2220 * @_vq: the struct virtqueue we're talking about. 2221 * @sgs: array of terminated scatterlists. 2222 * @out_sgs: the number of scatterlists readable by other side 2223 * @in_sgs: the number of scatterlists which are writable (after readable ones) 2224 * @data: the token identifying the buffer. 2225 * @gfp: how to do memory allocations (if necessary). 2226 * 2227 * Caller must ensure we don't call this with other virtqueue operations 2228 * at the same time (except where noted). 2229 * 2230 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2231 */ 2232 int virtqueue_add_sgs(struct virtqueue *_vq, 2233 struct scatterlist *sgs[], 2234 unsigned int out_sgs, 2235 unsigned int in_sgs, 2236 void *data, 2237 gfp_t gfp) 2238 { 2239 unsigned int i, total_sg = 0; 2240 2241 /* Count them first. */ 2242 for (i = 0; i < out_sgs + in_sgs; i++) { 2243 struct scatterlist *sg; 2244 2245 for (sg = sgs[i]; sg; sg = sg_next(sg)) 2246 total_sg++; 2247 } 2248 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 2249 data, NULL, false, gfp); 2250 } 2251 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 2252 2253 /** 2254 * virtqueue_add_outbuf - expose output buffers to other end 2255 * @vq: the struct virtqueue we're talking about. 2256 * @sg: scatterlist (must be well-formed and terminated!) 2257 * @num: the number of entries in @sg readable by other side 2258 * @data: the token identifying the buffer. 2259 * @gfp: how to do memory allocations (if necessary). 2260 * 2261 * Caller must ensure we don't call this with other virtqueue operations 2262 * at the same time (except where noted). 2263 * 2264 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2265 */ 2266 int virtqueue_add_outbuf(struct virtqueue *vq, 2267 struct scatterlist *sg, unsigned int num, 2268 void *data, 2269 gfp_t gfp) 2270 { 2271 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, false, gfp); 2272 } 2273 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 2274 2275 /** 2276 * virtqueue_add_outbuf_premapped - expose output buffers to other end 2277 * @vq: the struct virtqueue we're talking about. 2278 * @sg: scatterlist (must be well-formed and terminated!) 2279 * @num: the number of entries in @sg readable by other side 2280 * @data: the token identifying the buffer. 2281 * @gfp: how to do memory allocations (if necessary). 2282 * 2283 * Caller must ensure we don't call this with other virtqueue operations 2284 * at the same time (except where noted). 2285 * 2286 * Return: 2287 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2288 */ 2289 int virtqueue_add_outbuf_premapped(struct virtqueue *vq, 2290 struct scatterlist *sg, unsigned int num, 2291 void *data, 2292 gfp_t gfp) 2293 { 2294 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, true, gfp); 2295 } 2296 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped); 2297 2298 /** 2299 * virtqueue_add_inbuf - expose input buffers to other end 2300 * @vq: the struct virtqueue we're talking about. 2301 * @sg: scatterlist (must be well-formed and terminated!) 2302 * @num: the number of entries in @sg writable by other side 2303 * @data: the token identifying the buffer. 2304 * @gfp: how to do memory allocations (if necessary). 2305 * 2306 * Caller must ensure we don't call this with other virtqueue operations 2307 * at the same time (except where noted). 2308 * 2309 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2310 */ 2311 int virtqueue_add_inbuf(struct virtqueue *vq, 2312 struct scatterlist *sg, unsigned int num, 2313 void *data, 2314 gfp_t gfp) 2315 { 2316 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp); 2317 } 2318 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 2319 2320 /** 2321 * virtqueue_add_inbuf_ctx - expose input buffers to other end 2322 * @vq: the struct virtqueue we're talking about. 2323 * @sg: scatterlist (must be well-formed and terminated!) 2324 * @num: the number of entries in @sg writable by other side 2325 * @data: the token identifying the buffer. 2326 * @ctx: extra context for the token 2327 * @gfp: how to do memory allocations (if necessary). 2328 * 2329 * Caller must ensure we don't call this with other virtqueue operations 2330 * at the same time (except where noted). 2331 * 2332 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2333 */ 2334 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 2335 struct scatterlist *sg, unsigned int num, 2336 void *data, 2337 void *ctx, 2338 gfp_t gfp) 2339 { 2340 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, false, gfp); 2341 } 2342 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 2343 2344 /** 2345 * virtqueue_add_inbuf_premapped - expose input buffers to other end 2346 * @vq: the struct virtqueue we're talking about. 2347 * @sg: scatterlist (must be well-formed and terminated!) 2348 * @num: the number of entries in @sg writable by other side 2349 * @data: the token identifying the buffer. 2350 * @ctx: extra context for the token 2351 * @gfp: how to do memory allocations (if necessary). 2352 * 2353 * Caller must ensure we don't call this with other virtqueue operations 2354 * at the same time (except where noted). 2355 * 2356 * Return: 2357 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2358 */ 2359 int virtqueue_add_inbuf_premapped(struct virtqueue *vq, 2360 struct scatterlist *sg, unsigned int num, 2361 void *data, 2362 void *ctx, 2363 gfp_t gfp) 2364 { 2365 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, true, gfp); 2366 } 2367 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_premapped); 2368 2369 /** 2370 * virtqueue_dma_dev - get the dma dev 2371 * @_vq: the struct virtqueue we're talking about. 2372 * 2373 * Returns the dma dev. That can been used for dma api. 2374 */ 2375 struct device *virtqueue_dma_dev(struct virtqueue *_vq) 2376 { 2377 struct vring_virtqueue *vq = to_vvq(_vq); 2378 2379 if (vq->use_dma_api) 2380 return vring_dma_dev(vq); 2381 else 2382 return NULL; 2383 } 2384 EXPORT_SYMBOL_GPL(virtqueue_dma_dev); 2385 2386 /** 2387 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 2388 * @_vq: the struct virtqueue 2389 * 2390 * Instead of virtqueue_kick(), you can do: 2391 * if (virtqueue_kick_prepare(vq)) 2392 * virtqueue_notify(vq); 2393 * 2394 * This is sometimes useful because the virtqueue_kick_prepare() needs 2395 * to be serialized, but the actual virtqueue_notify() call does not. 2396 */ 2397 bool virtqueue_kick_prepare(struct virtqueue *_vq) 2398 { 2399 struct vring_virtqueue *vq = to_vvq(_vq); 2400 2401 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 2402 virtqueue_kick_prepare_split(_vq); 2403 } 2404 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 2405 2406 /** 2407 * virtqueue_notify - second half of split virtqueue_kick call. 2408 * @_vq: the struct virtqueue 2409 * 2410 * This does not need to be serialized. 2411 * 2412 * Returns false if host notify failed or queue is broken, otherwise true. 2413 */ 2414 bool virtqueue_notify(struct virtqueue *_vq) 2415 { 2416 struct vring_virtqueue *vq = to_vvq(_vq); 2417 2418 if (unlikely(vq->broken)) 2419 return false; 2420 2421 /* Prod other side to tell it about changes. */ 2422 if (!vq->notify(_vq)) { 2423 vq->broken = true; 2424 return false; 2425 } 2426 return true; 2427 } 2428 EXPORT_SYMBOL_GPL(virtqueue_notify); 2429 2430 /** 2431 * virtqueue_kick - update after add_buf 2432 * @vq: the struct virtqueue 2433 * 2434 * After one or more virtqueue_add_* calls, invoke this to kick 2435 * the other side. 2436 * 2437 * Caller must ensure we don't call this with other virtqueue 2438 * operations at the same time (except where noted). 2439 * 2440 * Returns false if kick failed, otherwise true. 2441 */ 2442 bool virtqueue_kick(struct virtqueue *vq) 2443 { 2444 if (virtqueue_kick_prepare(vq)) 2445 return virtqueue_notify(vq); 2446 return true; 2447 } 2448 EXPORT_SYMBOL_GPL(virtqueue_kick); 2449 2450 /** 2451 * virtqueue_get_buf_ctx - get the next used buffer 2452 * @_vq: the struct virtqueue we're talking about. 2453 * @len: the length written into the buffer 2454 * @ctx: extra context for the token 2455 * 2456 * If the device wrote data into the buffer, @len will be set to the 2457 * amount written. This means you don't need to clear the buffer 2458 * beforehand to ensure there's no data leakage in the case of short 2459 * writes. 2460 * 2461 * Caller must ensure we don't call this with other virtqueue 2462 * operations at the same time (except where noted). 2463 * 2464 * Returns NULL if there are no used buffers, or the "data" token 2465 * handed to virtqueue_add_*(). 2466 */ 2467 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 2468 void **ctx) 2469 { 2470 struct vring_virtqueue *vq = to_vvq(_vq); 2471 2472 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 2473 virtqueue_get_buf_ctx_split(_vq, len, ctx); 2474 } 2475 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 2476 2477 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 2478 { 2479 return virtqueue_get_buf_ctx(_vq, len, NULL); 2480 } 2481 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 2482 /** 2483 * virtqueue_disable_cb - disable callbacks 2484 * @_vq: the struct virtqueue we're talking about. 2485 * 2486 * Note that this is not necessarily synchronous, hence unreliable and only 2487 * useful as an optimization. 2488 * 2489 * Unlike other operations, this need not be serialized. 2490 */ 2491 void virtqueue_disable_cb(struct virtqueue *_vq) 2492 { 2493 struct vring_virtqueue *vq = to_vvq(_vq); 2494 2495 if (vq->packed_ring) 2496 virtqueue_disable_cb_packed(_vq); 2497 else 2498 virtqueue_disable_cb_split(_vq); 2499 } 2500 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 2501 2502 /** 2503 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 2504 * @_vq: the struct virtqueue we're talking about. 2505 * 2506 * This re-enables callbacks; it returns current queue state 2507 * in an opaque unsigned value. This value should be later tested by 2508 * virtqueue_poll, to detect a possible race between the driver checking for 2509 * more work, and enabling callbacks. 2510 * 2511 * Caller must ensure we don't call this with other virtqueue 2512 * operations at the same time (except where noted). 2513 */ 2514 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq) 2515 { 2516 struct vring_virtqueue *vq = to_vvq(_vq); 2517 2518 if (vq->event_triggered) 2519 vq->event_triggered = false; 2520 2521 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 2522 virtqueue_enable_cb_prepare_split(_vq); 2523 } 2524 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 2525 2526 /** 2527 * virtqueue_poll - query pending used buffers 2528 * @_vq: the struct virtqueue we're talking about. 2529 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 2530 * 2531 * Returns "true" if there are pending used buffers in the queue. 2532 * 2533 * This does not need to be serialized. 2534 */ 2535 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx) 2536 { 2537 struct vring_virtqueue *vq = to_vvq(_vq); 2538 2539 if (unlikely(vq->broken)) 2540 return false; 2541 2542 virtio_mb(vq->weak_barriers); 2543 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 2544 virtqueue_poll_split(_vq, last_used_idx); 2545 } 2546 EXPORT_SYMBOL_GPL(virtqueue_poll); 2547 2548 /** 2549 * virtqueue_enable_cb - restart callbacks after disable_cb. 2550 * @_vq: the struct virtqueue we're talking about. 2551 * 2552 * This re-enables callbacks; it returns "false" if there are pending 2553 * buffers in the queue, to detect a possible race between the driver 2554 * checking for more work, and enabling callbacks. 2555 * 2556 * Caller must ensure we don't call this with other virtqueue 2557 * operations at the same time (except where noted). 2558 */ 2559 bool virtqueue_enable_cb(struct virtqueue *_vq) 2560 { 2561 unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq); 2562 2563 return !virtqueue_poll(_vq, last_used_idx); 2564 } 2565 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 2566 2567 /** 2568 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 2569 * @_vq: the struct virtqueue we're talking about. 2570 * 2571 * This re-enables callbacks but hints to the other side to delay 2572 * interrupts until most of the available buffers have been processed; 2573 * it returns "false" if there are many pending buffers in the queue, 2574 * to detect a possible race between the driver checking for more work, 2575 * and enabling callbacks. 2576 * 2577 * Caller must ensure we don't call this with other virtqueue 2578 * operations at the same time (except where noted). 2579 */ 2580 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2581 { 2582 struct vring_virtqueue *vq = to_vvq(_vq); 2583 2584 if (vq->event_triggered) 2585 vq->event_triggered = false; 2586 2587 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2588 virtqueue_enable_cb_delayed_split(_vq); 2589 } 2590 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2591 2592 /** 2593 * virtqueue_detach_unused_buf - detach first unused buffer 2594 * @_vq: the struct virtqueue we're talking about. 2595 * 2596 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2597 * This is not valid on an active queue; it is useful for device 2598 * shutdown or the reset queue. 2599 */ 2600 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2601 { 2602 struct vring_virtqueue *vq = to_vvq(_vq); 2603 2604 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2605 virtqueue_detach_unused_buf_split(_vq); 2606 } 2607 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2608 2609 static inline bool more_used(const struct vring_virtqueue *vq) 2610 { 2611 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2612 } 2613 2614 /** 2615 * vring_interrupt - notify a virtqueue on an interrupt 2616 * @irq: the IRQ number (ignored) 2617 * @_vq: the struct virtqueue to notify 2618 * 2619 * Calls the callback function of @_vq to process the virtqueue 2620 * notification. 2621 */ 2622 irqreturn_t vring_interrupt(int irq, void *_vq) 2623 { 2624 struct vring_virtqueue *vq = to_vvq(_vq); 2625 2626 if (!more_used(vq)) { 2627 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2628 return IRQ_NONE; 2629 } 2630 2631 if (unlikely(vq->broken)) { 2632 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 2633 dev_warn_once(&vq->vq.vdev->dev, 2634 "virtio vring IRQ raised before DRIVER_OK"); 2635 return IRQ_NONE; 2636 #else 2637 return IRQ_HANDLED; 2638 #endif 2639 } 2640 2641 /* Just a hint for performance: so it's ok that this can be racy! */ 2642 if (vq->event) 2643 data_race(vq->event_triggered = true); 2644 2645 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2646 if (vq->vq.callback) 2647 vq->vq.callback(&vq->vq); 2648 2649 return IRQ_HANDLED; 2650 } 2651 EXPORT_SYMBOL_GPL(vring_interrupt); 2652 2653 /* Only available for split ring */ 2654 static struct virtqueue *__vring_new_virtqueue(unsigned int index, 2655 struct vring_virtqueue_split *vring_split, 2656 struct virtio_device *vdev, 2657 bool weak_barriers, 2658 bool context, 2659 bool (*notify)(struct virtqueue *), 2660 void (*callback)(struct virtqueue *), 2661 const char *name, 2662 struct device *dma_dev) 2663 { 2664 struct vring_virtqueue *vq; 2665 int err; 2666 2667 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2668 return NULL; 2669 2670 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2671 if (!vq) 2672 return NULL; 2673 2674 vq->packed_ring = false; 2675 vq->vq.callback = callback; 2676 vq->vq.vdev = vdev; 2677 vq->vq.name = name; 2678 vq->vq.index = index; 2679 vq->vq.reset = false; 2680 vq->we_own_ring = false; 2681 vq->notify = notify; 2682 vq->weak_barriers = weak_barriers; 2683 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 2684 vq->broken = true; 2685 #else 2686 vq->broken = false; 2687 #endif 2688 vq->dma_dev = dma_dev; 2689 vq->use_dma_api = vring_use_dma_api(vdev); 2690 2691 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2692 !context; 2693 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2694 2695 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2696 vq->weak_barriers = false; 2697 2698 err = vring_alloc_state_extra_split(vring_split); 2699 if (err) { 2700 kfree(vq); 2701 return NULL; 2702 } 2703 2704 virtqueue_vring_init_split(vring_split, vq); 2705 2706 virtqueue_init(vq, vring_split->vring.num); 2707 virtqueue_vring_attach_split(vq, vring_split); 2708 2709 spin_lock(&vdev->vqs_list_lock); 2710 list_add_tail(&vq->vq.list, &vdev->vqs); 2711 spin_unlock(&vdev->vqs_list_lock); 2712 return &vq->vq; 2713 } 2714 2715 struct virtqueue *vring_create_virtqueue( 2716 unsigned int index, 2717 unsigned int num, 2718 unsigned int vring_align, 2719 struct virtio_device *vdev, 2720 bool weak_barriers, 2721 bool may_reduce_num, 2722 bool context, 2723 bool (*notify)(struct virtqueue *), 2724 void (*callback)(struct virtqueue *), 2725 const char *name) 2726 { 2727 2728 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2729 return vring_create_virtqueue_packed(index, num, vring_align, 2730 vdev, weak_barriers, may_reduce_num, 2731 context, notify, callback, name, vdev->dev.parent); 2732 2733 return vring_create_virtqueue_split(index, num, vring_align, 2734 vdev, weak_barriers, may_reduce_num, 2735 context, notify, callback, name, vdev->dev.parent); 2736 } 2737 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2738 2739 struct virtqueue *vring_create_virtqueue_dma( 2740 unsigned int index, 2741 unsigned int num, 2742 unsigned int vring_align, 2743 struct virtio_device *vdev, 2744 bool weak_barriers, 2745 bool may_reduce_num, 2746 bool context, 2747 bool (*notify)(struct virtqueue *), 2748 void (*callback)(struct virtqueue *), 2749 const char *name, 2750 struct device *dma_dev) 2751 { 2752 2753 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2754 return vring_create_virtqueue_packed(index, num, vring_align, 2755 vdev, weak_barriers, may_reduce_num, 2756 context, notify, callback, name, dma_dev); 2757 2758 return vring_create_virtqueue_split(index, num, vring_align, 2759 vdev, weak_barriers, may_reduce_num, 2760 context, notify, callback, name, dma_dev); 2761 } 2762 EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); 2763 2764 /** 2765 * virtqueue_resize - resize the vring of vq 2766 * @_vq: the struct virtqueue we're talking about. 2767 * @num: new ring num 2768 * @recycle: callback to recycle unused buffers 2769 * 2770 * When it is really necessary to create a new vring, it will set the current vq 2771 * into the reset state. Then call the passed callback to recycle the buffer 2772 * that is no longer used. Only after the new vring is successfully created, the 2773 * old vring will be released. 2774 * 2775 * Caller must ensure we don't call this with other virtqueue operations 2776 * at the same time (except where noted). 2777 * 2778 * Returns zero or a negative error. 2779 * 0: success. 2780 * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size. 2781 * vq can still work normally 2782 * -EBUSY: Failed to sync with device, vq may not work properly 2783 * -ENOENT: Transport or device not supported 2784 * -E2BIG/-EINVAL: num error 2785 * -EPERM: Operation not permitted 2786 * 2787 */ 2788 int virtqueue_resize(struct virtqueue *_vq, u32 num, 2789 void (*recycle)(struct virtqueue *vq, void *buf)) 2790 { 2791 struct vring_virtqueue *vq = to_vvq(_vq); 2792 int err; 2793 2794 if (num > vq->vq.num_max) 2795 return -E2BIG; 2796 2797 if (!num) 2798 return -EINVAL; 2799 2800 if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num) 2801 return 0; 2802 2803 err = virtqueue_disable_and_recycle(_vq, recycle); 2804 if (err) 2805 return err; 2806 2807 if (vq->packed_ring) 2808 err = virtqueue_resize_packed(_vq, num); 2809 else 2810 err = virtqueue_resize_split(_vq, num); 2811 2812 return virtqueue_enable_after_reset(_vq); 2813 } 2814 EXPORT_SYMBOL_GPL(virtqueue_resize); 2815 2816 /** 2817 * virtqueue_reset - detach and recycle all unused buffers 2818 * @_vq: the struct virtqueue we're talking about. 2819 * @recycle: callback to recycle unused buffers 2820 * 2821 * Caller must ensure we don't call this with other virtqueue operations 2822 * at the same time (except where noted). 2823 * 2824 * Returns zero or a negative error. 2825 * 0: success. 2826 * -EBUSY: Failed to sync with device, vq may not work properly 2827 * -ENOENT: Transport or device not supported 2828 * -EPERM: Operation not permitted 2829 */ 2830 int virtqueue_reset(struct virtqueue *_vq, 2831 void (*recycle)(struct virtqueue *vq, void *buf)) 2832 { 2833 struct vring_virtqueue *vq = to_vvq(_vq); 2834 int err; 2835 2836 err = virtqueue_disable_and_recycle(_vq, recycle); 2837 if (err) 2838 return err; 2839 2840 if (vq->packed_ring) 2841 virtqueue_reinit_packed(vq); 2842 else 2843 virtqueue_reinit_split(vq); 2844 2845 return virtqueue_enable_after_reset(_vq); 2846 } 2847 EXPORT_SYMBOL_GPL(virtqueue_reset); 2848 2849 /* Only available for split ring */ 2850 struct virtqueue *vring_new_virtqueue(unsigned int index, 2851 unsigned int num, 2852 unsigned int vring_align, 2853 struct virtio_device *vdev, 2854 bool weak_barriers, 2855 bool context, 2856 void *pages, 2857 bool (*notify)(struct virtqueue *vq), 2858 void (*callback)(struct virtqueue *vq), 2859 const char *name) 2860 { 2861 struct vring_virtqueue_split vring_split = {}; 2862 2863 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2864 return NULL; 2865 2866 vring_init(&vring_split.vring, num, pages, vring_align); 2867 return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers, 2868 context, notify, callback, name, 2869 vdev->dev.parent); 2870 } 2871 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2872 2873 static void vring_free(struct virtqueue *_vq) 2874 { 2875 struct vring_virtqueue *vq = to_vvq(_vq); 2876 2877 if (vq->we_own_ring) { 2878 if (vq->packed_ring) { 2879 vring_free_queue(vq->vq.vdev, 2880 vq->packed.ring_size_in_bytes, 2881 vq->packed.vring.desc, 2882 vq->packed.ring_dma_addr, 2883 vring_dma_dev(vq)); 2884 2885 vring_free_queue(vq->vq.vdev, 2886 vq->packed.event_size_in_bytes, 2887 vq->packed.vring.driver, 2888 vq->packed.driver_event_dma_addr, 2889 vring_dma_dev(vq)); 2890 2891 vring_free_queue(vq->vq.vdev, 2892 vq->packed.event_size_in_bytes, 2893 vq->packed.vring.device, 2894 vq->packed.device_event_dma_addr, 2895 vring_dma_dev(vq)); 2896 2897 kfree(vq->packed.desc_state); 2898 kfree(vq->packed.desc_extra); 2899 } else { 2900 vring_free_queue(vq->vq.vdev, 2901 vq->split.queue_size_in_bytes, 2902 vq->split.vring.desc, 2903 vq->split.queue_dma_addr, 2904 vring_dma_dev(vq)); 2905 } 2906 } 2907 if (!vq->packed_ring) { 2908 kfree(vq->split.desc_state); 2909 kfree(vq->split.desc_extra); 2910 } 2911 } 2912 2913 void vring_del_virtqueue(struct virtqueue *_vq) 2914 { 2915 struct vring_virtqueue *vq = to_vvq(_vq); 2916 2917 spin_lock(&vq->vq.vdev->vqs_list_lock); 2918 list_del(&_vq->list); 2919 spin_unlock(&vq->vq.vdev->vqs_list_lock); 2920 2921 vring_free(_vq); 2922 2923 kfree(vq); 2924 } 2925 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2926 2927 u32 vring_notification_data(struct virtqueue *_vq) 2928 { 2929 struct vring_virtqueue *vq = to_vvq(_vq); 2930 u16 next; 2931 2932 if (vq->packed_ring) 2933 next = (vq->packed.next_avail_idx & 2934 ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) | 2935 vq->packed.avail_wrap_counter << 2936 VRING_PACKED_EVENT_F_WRAP_CTR; 2937 else 2938 next = vq->split.avail_idx_shadow; 2939 2940 return next << 16 | _vq->index; 2941 } 2942 EXPORT_SYMBOL_GPL(vring_notification_data); 2943 2944 /* Manipulates transport-specific feature bits. */ 2945 void vring_transport_features(struct virtio_device *vdev) 2946 { 2947 unsigned int i; 2948 2949 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2950 switch (i) { 2951 case VIRTIO_RING_F_INDIRECT_DESC: 2952 break; 2953 case VIRTIO_RING_F_EVENT_IDX: 2954 break; 2955 case VIRTIO_F_VERSION_1: 2956 break; 2957 case VIRTIO_F_ACCESS_PLATFORM: 2958 break; 2959 case VIRTIO_F_RING_PACKED: 2960 break; 2961 case VIRTIO_F_ORDER_PLATFORM: 2962 break; 2963 case VIRTIO_F_NOTIFICATION_DATA: 2964 break; 2965 default: 2966 /* We don't understand this bit. */ 2967 __virtio_clear_bit(vdev, i); 2968 } 2969 } 2970 } 2971 EXPORT_SYMBOL_GPL(vring_transport_features); 2972 2973 /** 2974 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2975 * @_vq: the struct virtqueue containing the vring of interest. 2976 * 2977 * Returns the size of the vring. This is mainly used for boasting to 2978 * userspace. Unlike other operations, this need not be serialized. 2979 */ 2980 unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq) 2981 { 2982 2983 const struct vring_virtqueue *vq = to_vvq(_vq); 2984 2985 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2986 } 2987 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2988 2989 /* 2990 * This function should only be called by the core, not directly by the driver. 2991 */ 2992 void __virtqueue_break(struct virtqueue *_vq) 2993 { 2994 struct vring_virtqueue *vq = to_vvq(_vq); 2995 2996 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2997 WRITE_ONCE(vq->broken, true); 2998 } 2999 EXPORT_SYMBOL_GPL(__virtqueue_break); 3000 3001 /* 3002 * This function should only be called by the core, not directly by the driver. 3003 */ 3004 void __virtqueue_unbreak(struct virtqueue *_vq) 3005 { 3006 struct vring_virtqueue *vq = to_vvq(_vq); 3007 3008 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 3009 WRITE_ONCE(vq->broken, false); 3010 } 3011 EXPORT_SYMBOL_GPL(__virtqueue_unbreak); 3012 3013 bool virtqueue_is_broken(const struct virtqueue *_vq) 3014 { 3015 const struct vring_virtqueue *vq = to_vvq(_vq); 3016 3017 return READ_ONCE(vq->broken); 3018 } 3019 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 3020 3021 /* 3022 * This should prevent the device from being used, allowing drivers to 3023 * recover. You may need to grab appropriate locks to flush. 3024 */ 3025 void virtio_break_device(struct virtio_device *dev) 3026 { 3027 struct virtqueue *_vq; 3028 3029 spin_lock(&dev->vqs_list_lock); 3030 list_for_each_entry(_vq, &dev->vqs, list) { 3031 struct vring_virtqueue *vq = to_vvq(_vq); 3032 3033 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 3034 WRITE_ONCE(vq->broken, true); 3035 } 3036 spin_unlock(&dev->vqs_list_lock); 3037 } 3038 EXPORT_SYMBOL_GPL(virtio_break_device); 3039 3040 /* 3041 * This should allow the device to be used by the driver. You may 3042 * need to grab appropriate locks to flush the write to 3043 * vq->broken. This should only be used in some specific case e.g 3044 * (probing and restoring). This function should only be called by the 3045 * core, not directly by the driver. 3046 */ 3047 void __virtio_unbreak_device(struct virtio_device *dev) 3048 { 3049 struct virtqueue *_vq; 3050 3051 spin_lock(&dev->vqs_list_lock); 3052 list_for_each_entry(_vq, &dev->vqs, list) { 3053 struct vring_virtqueue *vq = to_vvq(_vq); 3054 3055 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 3056 WRITE_ONCE(vq->broken, false); 3057 } 3058 spin_unlock(&dev->vqs_list_lock); 3059 } 3060 EXPORT_SYMBOL_GPL(__virtio_unbreak_device); 3061 3062 dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq) 3063 { 3064 const struct vring_virtqueue *vq = to_vvq(_vq); 3065 3066 BUG_ON(!vq->we_own_ring); 3067 3068 if (vq->packed_ring) 3069 return vq->packed.ring_dma_addr; 3070 3071 return vq->split.queue_dma_addr; 3072 } 3073 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 3074 3075 dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq) 3076 { 3077 const struct vring_virtqueue *vq = to_vvq(_vq); 3078 3079 BUG_ON(!vq->we_own_ring); 3080 3081 if (vq->packed_ring) 3082 return vq->packed.driver_event_dma_addr; 3083 3084 return vq->split.queue_dma_addr + 3085 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 3086 } 3087 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 3088 3089 dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq) 3090 { 3091 const struct vring_virtqueue *vq = to_vvq(_vq); 3092 3093 BUG_ON(!vq->we_own_ring); 3094 3095 if (vq->packed_ring) 3096 return vq->packed.device_event_dma_addr; 3097 3098 return vq->split.queue_dma_addr + 3099 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 3100 } 3101 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 3102 3103 /* Only available for split ring */ 3104 const struct vring *virtqueue_get_vring(const struct virtqueue *vq) 3105 { 3106 return &to_vvq(vq)->split.vring; 3107 } 3108 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 3109 3110 /** 3111 * virtqueue_dma_map_single_attrs - map DMA for _vq 3112 * @_vq: the struct virtqueue we're talking about. 3113 * @ptr: the pointer of the buffer to do dma 3114 * @size: the size of the buffer to do dma 3115 * @dir: DMA direction 3116 * @attrs: DMA Attrs 3117 * 3118 * The caller calls this to do dma mapping in advance. The DMA address can be 3119 * passed to this _vq when it is in pre-mapped mode. 3120 * 3121 * return DMA address. Caller should check that by virtqueue_dma_mapping_error(). 3122 */ 3123 dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, 3124 size_t size, 3125 enum dma_data_direction dir, 3126 unsigned long attrs) 3127 { 3128 struct vring_virtqueue *vq = to_vvq(_vq); 3129 3130 if (!vq->use_dma_api) { 3131 kmsan_handle_dma(virt_to_page(ptr), offset_in_page(ptr), size, dir); 3132 return (dma_addr_t)virt_to_phys(ptr); 3133 } 3134 3135 return dma_map_single_attrs(vring_dma_dev(vq), ptr, size, dir, attrs); 3136 } 3137 EXPORT_SYMBOL_GPL(virtqueue_dma_map_single_attrs); 3138 3139 /** 3140 * virtqueue_dma_unmap_single_attrs - unmap DMA for _vq 3141 * @_vq: the struct virtqueue we're talking about. 3142 * @addr: the dma address to unmap 3143 * @size: the size of the buffer 3144 * @dir: DMA direction 3145 * @attrs: DMA Attrs 3146 * 3147 * Unmap the address that is mapped by the virtqueue_dma_map_* APIs. 3148 * 3149 */ 3150 void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, 3151 size_t size, enum dma_data_direction dir, 3152 unsigned long attrs) 3153 { 3154 struct vring_virtqueue *vq = to_vvq(_vq); 3155 3156 if (!vq->use_dma_api) 3157 return; 3158 3159 dma_unmap_single_attrs(vring_dma_dev(vq), addr, size, dir, attrs); 3160 } 3161 EXPORT_SYMBOL_GPL(virtqueue_dma_unmap_single_attrs); 3162 3163 /** 3164 * virtqueue_dma_mapping_error - check dma address 3165 * @_vq: the struct virtqueue we're talking about. 3166 * @addr: DMA address 3167 * 3168 * Returns 0 means dma valid. Other means invalid dma address. 3169 */ 3170 int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr) 3171 { 3172 struct vring_virtqueue *vq = to_vvq(_vq); 3173 3174 if (!vq->use_dma_api) 3175 return 0; 3176 3177 return dma_mapping_error(vring_dma_dev(vq), addr); 3178 } 3179 EXPORT_SYMBOL_GPL(virtqueue_dma_mapping_error); 3180 3181 /** 3182 * virtqueue_dma_need_sync - check a dma address needs sync 3183 * @_vq: the struct virtqueue we're talking about. 3184 * @addr: DMA address 3185 * 3186 * Check if the dma address mapped by the virtqueue_dma_map_* APIs needs to be 3187 * synchronized 3188 * 3189 * return bool 3190 */ 3191 bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr) 3192 { 3193 struct vring_virtqueue *vq = to_vvq(_vq); 3194 3195 if (!vq->use_dma_api) 3196 return false; 3197 3198 return dma_need_sync(vring_dma_dev(vq), addr); 3199 } 3200 EXPORT_SYMBOL_GPL(virtqueue_dma_need_sync); 3201 3202 /** 3203 * virtqueue_dma_sync_single_range_for_cpu - dma sync for cpu 3204 * @_vq: the struct virtqueue we're talking about. 3205 * @addr: DMA address 3206 * @offset: DMA address offset 3207 * @size: buf size for sync 3208 * @dir: DMA direction 3209 * 3210 * Before calling this function, use virtqueue_dma_need_sync() to confirm that 3211 * the DMA address really needs to be synchronized 3212 * 3213 */ 3214 void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, 3215 dma_addr_t addr, 3216 unsigned long offset, size_t size, 3217 enum dma_data_direction dir) 3218 { 3219 struct vring_virtqueue *vq = to_vvq(_vq); 3220 struct device *dev = vring_dma_dev(vq); 3221 3222 if (!vq->use_dma_api) 3223 return; 3224 3225 dma_sync_single_range_for_cpu(dev, addr, offset, size, dir); 3226 } 3227 EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_cpu); 3228 3229 /** 3230 * virtqueue_dma_sync_single_range_for_device - dma sync for device 3231 * @_vq: the struct virtqueue we're talking about. 3232 * @addr: DMA address 3233 * @offset: DMA address offset 3234 * @size: buf size for sync 3235 * @dir: DMA direction 3236 * 3237 * Before calling this function, use virtqueue_dma_need_sync() to confirm that 3238 * the DMA address really needs to be synchronized 3239 */ 3240 void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, 3241 dma_addr_t addr, 3242 unsigned long offset, size_t size, 3243 enum dma_data_direction dir) 3244 { 3245 struct vring_virtqueue *vq = to_vvq(_vq); 3246 struct device *dev = vring_dma_dev(vq); 3247 3248 if (!vq->use_dma_api) 3249 return; 3250 3251 dma_sync_single_range_for_device(dev, addr, offset, size, dir); 3252 } 3253 EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_device); 3254 3255 MODULE_DESCRIPTION("Virtio ring implementation"); 3256 MODULE_LICENSE("GPL"); 3257