1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Virtio ring implementation. 3 * 4 * Copyright 2007 Rusty Russell IBM Corporation 5 */ 6 #include <linux/virtio.h> 7 #include <linux/virtio_ring.h> 8 #include <linux/virtio_config.h> 9 #include <linux/device.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/hrtimer.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/kmsan.h> 15 #include <linux/spinlock.h> 16 #include <xen/xen.h> 17 18 #ifdef DEBUG 19 /* For development, we want to crash whenever the ring is screwed. */ 20 #define BAD_RING(_vq, fmt, args...) \ 21 do { \ 22 dev_err(&(_vq)->vq.vdev->dev, \ 23 "%s:"fmt, (_vq)->vq.name, ##args); \ 24 BUG(); \ 25 } while (0) 26 /* Caller is supposed to guarantee no reentry. */ 27 #define START_USE(_vq) \ 28 do { \ 29 if ((_vq)->in_use) \ 30 panic("%s:in_use = %i\n", \ 31 (_vq)->vq.name, (_vq)->in_use); \ 32 (_vq)->in_use = __LINE__; \ 33 } while (0) 34 #define END_USE(_vq) \ 35 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 36 #define LAST_ADD_TIME_UPDATE(_vq) \ 37 do { \ 38 ktime_t now = ktime_get(); \ 39 \ 40 /* No kick or get, with .1 second between? Warn. */ \ 41 if ((_vq)->last_add_time_valid) \ 42 WARN_ON(ktime_to_ms(ktime_sub(now, \ 43 (_vq)->last_add_time)) > 100); \ 44 (_vq)->last_add_time = now; \ 45 (_vq)->last_add_time_valid = true; \ 46 } while (0) 47 #define LAST_ADD_TIME_CHECK(_vq) \ 48 do { \ 49 if ((_vq)->last_add_time_valid) { \ 50 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 51 (_vq)->last_add_time)) > 100); \ 52 } \ 53 } while (0) 54 #define LAST_ADD_TIME_INVALID(_vq) \ 55 ((_vq)->last_add_time_valid = false) 56 #else 57 #define BAD_RING(_vq, fmt, args...) \ 58 do { \ 59 dev_err(&_vq->vq.vdev->dev, \ 60 "%s:"fmt, (_vq)->vq.name, ##args); \ 61 (_vq)->broken = true; \ 62 } while (0) 63 #define START_USE(vq) 64 #define END_USE(vq) 65 #define LAST_ADD_TIME_UPDATE(vq) 66 #define LAST_ADD_TIME_CHECK(vq) 67 #define LAST_ADD_TIME_INVALID(vq) 68 #endif 69 70 struct vring_desc_state_split { 71 void *data; /* Data for callback. */ 72 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 73 }; 74 75 struct vring_desc_state_packed { 76 void *data; /* Data for callback. */ 77 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 78 u16 num; /* Descriptor list length. */ 79 u16 last; /* The last desc state in a list. */ 80 }; 81 82 struct vring_desc_extra { 83 dma_addr_t addr; /* Descriptor DMA addr. */ 84 u32 len; /* Descriptor length. */ 85 u16 flags; /* Descriptor flags. */ 86 u16 next; /* The next desc state in a list. */ 87 }; 88 89 struct vring_virtqueue_split { 90 /* Actual memory layout for this queue. */ 91 struct vring vring; 92 93 /* Last written value to avail->flags */ 94 u16 avail_flags_shadow; 95 96 /* 97 * Last written value to avail->idx in 98 * guest byte order. 99 */ 100 u16 avail_idx_shadow; 101 102 /* Per-descriptor state. */ 103 struct vring_desc_state_split *desc_state; 104 struct vring_desc_extra *desc_extra; 105 106 /* DMA address and size information */ 107 dma_addr_t queue_dma_addr; 108 size_t queue_size_in_bytes; 109 110 /* 111 * The parameters for creating vrings are reserved for creating new 112 * vring. 113 */ 114 u32 vring_align; 115 bool may_reduce_num; 116 }; 117 118 struct vring_virtqueue_packed { 119 /* Actual memory layout for this queue. */ 120 struct { 121 unsigned int num; 122 struct vring_packed_desc *desc; 123 struct vring_packed_desc_event *driver; 124 struct vring_packed_desc_event *device; 125 } vring; 126 127 /* Driver ring wrap counter. */ 128 bool avail_wrap_counter; 129 130 /* Avail used flags. */ 131 u16 avail_used_flags; 132 133 /* Index of the next avail descriptor. */ 134 u16 next_avail_idx; 135 136 /* 137 * Last written value to driver->flags in 138 * guest byte order. 139 */ 140 u16 event_flags_shadow; 141 142 /* Per-descriptor state. */ 143 struct vring_desc_state_packed *desc_state; 144 struct vring_desc_extra *desc_extra; 145 146 /* DMA address and size information */ 147 dma_addr_t ring_dma_addr; 148 dma_addr_t driver_event_dma_addr; 149 dma_addr_t device_event_dma_addr; 150 size_t ring_size_in_bytes; 151 size_t event_size_in_bytes; 152 }; 153 154 struct vring_virtqueue { 155 struct virtqueue vq; 156 157 /* Is this a packed ring? */ 158 bool packed_ring; 159 160 /* Is DMA API used? */ 161 bool use_dma_api; 162 163 /* Can we use weak barriers? */ 164 bool weak_barriers; 165 166 /* Other side has made a mess, don't try any more. */ 167 bool broken; 168 169 /* Host supports indirect buffers */ 170 bool indirect; 171 172 /* Host publishes avail event idx */ 173 bool event; 174 175 /* Do DMA mapping by driver */ 176 bool premapped; 177 178 /* Do unmap or not for desc. Just when premapped is False and 179 * use_dma_api is true, this is true. 180 */ 181 bool do_unmap; 182 183 /* Head of free buffer list. */ 184 unsigned int free_head; 185 /* Number we've added since last sync. */ 186 unsigned int num_added; 187 188 /* Last used index we've seen. 189 * for split ring, it just contains last used index 190 * for packed ring: 191 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index. 192 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter. 193 */ 194 u16 last_used_idx; 195 196 /* Hint for event idx: already triggered no need to disable. */ 197 bool event_triggered; 198 199 union { 200 /* Available for split ring */ 201 struct vring_virtqueue_split split; 202 203 /* Available for packed ring */ 204 struct vring_virtqueue_packed packed; 205 }; 206 207 /* How to notify other side. FIXME: commonalize hcalls! */ 208 bool (*notify)(struct virtqueue *vq); 209 210 /* DMA, allocation, and size information */ 211 bool we_own_ring; 212 213 /* Device used for doing DMA */ 214 struct device *dma_dev; 215 216 #ifdef DEBUG 217 /* They're supposed to lock for us. */ 218 unsigned int in_use; 219 220 /* Figure out if their kicks are too delayed. */ 221 bool last_add_time_valid; 222 ktime_t last_add_time; 223 #endif 224 }; 225 226 static struct virtqueue *__vring_new_virtqueue(unsigned int index, 227 struct vring_virtqueue_split *vring_split, 228 struct virtio_device *vdev, 229 bool weak_barriers, 230 bool context, 231 bool (*notify)(struct virtqueue *), 232 void (*callback)(struct virtqueue *), 233 const char *name, 234 struct device *dma_dev); 235 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num); 236 static void vring_free(struct virtqueue *_vq); 237 238 /* 239 * Helpers. 240 */ 241 242 #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq) 243 244 static bool virtqueue_use_indirect(const struct vring_virtqueue *vq, 245 unsigned int total_sg) 246 { 247 /* 248 * If the host supports indirect descriptor tables, and we have multiple 249 * buffers, then go indirect. FIXME: tune this threshold 250 */ 251 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 252 } 253 254 /* 255 * Modern virtio devices have feature bits to specify whether they need a 256 * quirk and bypass the IOMMU. If not there, just use the DMA API. 257 * 258 * If there, the interaction between virtio and DMA API is messy. 259 * 260 * On most systems with virtio, physical addresses match bus addresses, 261 * and it doesn't particularly matter whether we use the DMA API. 262 * 263 * On some systems, including Xen and any system with a physical device 264 * that speaks virtio behind a physical IOMMU, we must use the DMA API 265 * for virtio DMA to work at all. 266 * 267 * On other systems, including SPARC and PPC64, virtio-pci devices are 268 * enumerated as though they are behind an IOMMU, but the virtio host 269 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 270 * there or somehow map everything as the identity. 271 * 272 * For the time being, we preserve historic behavior and bypass the DMA 273 * API. 274 * 275 * TODO: install a per-device DMA ops structure that does the right thing 276 * taking into account all the above quirks, and use the DMA API 277 * unconditionally on data path. 278 */ 279 280 static bool vring_use_dma_api(const struct virtio_device *vdev) 281 { 282 if (!virtio_has_dma_quirk(vdev)) 283 return true; 284 285 /* Otherwise, we are left to guess. */ 286 /* 287 * In theory, it's possible to have a buggy QEMU-supposed 288 * emulated Q35 IOMMU and Xen enabled at the same time. On 289 * such a configuration, virtio has never worked and will 290 * not work without an even larger kludge. Instead, enable 291 * the DMA API if we're a Xen guest, which at least allows 292 * all of the sensible Xen configurations to work correctly. 293 */ 294 if (xen_domain()) 295 return true; 296 297 return false; 298 } 299 300 size_t virtio_max_dma_size(const struct virtio_device *vdev) 301 { 302 size_t max_segment_size = SIZE_MAX; 303 304 if (vring_use_dma_api(vdev)) 305 max_segment_size = dma_max_mapping_size(vdev->dev.parent); 306 307 return max_segment_size; 308 } 309 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 310 311 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 312 dma_addr_t *dma_handle, gfp_t flag, 313 struct device *dma_dev) 314 { 315 if (vring_use_dma_api(vdev)) { 316 return dma_alloc_coherent(dma_dev, size, 317 dma_handle, flag); 318 } else { 319 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 320 321 if (queue) { 322 phys_addr_t phys_addr = virt_to_phys(queue); 323 *dma_handle = (dma_addr_t)phys_addr; 324 325 /* 326 * Sanity check: make sure we dind't truncate 327 * the address. The only arches I can find that 328 * have 64-bit phys_addr_t but 32-bit dma_addr_t 329 * are certain non-highmem MIPS and x86 330 * configurations, but these configurations 331 * should never allocate physical pages above 32 332 * bits, so this is fine. Just in case, throw a 333 * warning and abort if we end up with an 334 * unrepresentable address. 335 */ 336 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 337 free_pages_exact(queue, PAGE_ALIGN(size)); 338 return NULL; 339 } 340 } 341 return queue; 342 } 343 } 344 345 static void vring_free_queue(struct virtio_device *vdev, size_t size, 346 void *queue, dma_addr_t dma_handle, 347 struct device *dma_dev) 348 { 349 if (vring_use_dma_api(vdev)) 350 dma_free_coherent(dma_dev, size, queue, dma_handle); 351 else 352 free_pages_exact(queue, PAGE_ALIGN(size)); 353 } 354 355 /* 356 * The DMA ops on various arches are rather gnarly right now, and 357 * making all of the arch DMA ops work on the vring device itself 358 * is a mess. 359 */ 360 static struct device *vring_dma_dev(const struct vring_virtqueue *vq) 361 { 362 return vq->dma_dev; 363 } 364 365 /* Map one sg entry. */ 366 static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg, 367 enum dma_data_direction direction, dma_addr_t *addr) 368 { 369 if (vq->premapped) { 370 *addr = sg_dma_address(sg); 371 return 0; 372 } 373 374 if (!vq->use_dma_api) { 375 /* 376 * If DMA is not used, KMSAN doesn't know that the scatterlist 377 * is initialized by the hardware. Explicitly check/unpoison it 378 * depending on the direction. 379 */ 380 kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction); 381 *addr = (dma_addr_t)sg_phys(sg); 382 return 0; 383 } 384 385 /* 386 * We can't use dma_map_sg, because we don't use scatterlists in 387 * the way it expects (we don't guarantee that the scatterlist 388 * will exist for the lifetime of the mapping). 389 */ 390 *addr = dma_map_page(vring_dma_dev(vq), 391 sg_page(sg), sg->offset, sg->length, 392 direction); 393 394 if (dma_mapping_error(vring_dma_dev(vq), *addr)) 395 return -ENOMEM; 396 397 return 0; 398 } 399 400 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 401 void *cpu_addr, size_t size, 402 enum dma_data_direction direction) 403 { 404 if (!vq->use_dma_api) 405 return (dma_addr_t)virt_to_phys(cpu_addr); 406 407 return dma_map_single(vring_dma_dev(vq), 408 cpu_addr, size, direction); 409 } 410 411 static int vring_mapping_error(const struct vring_virtqueue *vq, 412 dma_addr_t addr) 413 { 414 if (!vq->use_dma_api) 415 return 0; 416 417 return dma_mapping_error(vring_dma_dev(vq), addr); 418 } 419 420 static void virtqueue_init(struct vring_virtqueue *vq, u32 num) 421 { 422 vq->vq.num_free = num; 423 424 if (vq->packed_ring) 425 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR); 426 else 427 vq->last_used_idx = 0; 428 429 vq->event_triggered = false; 430 vq->num_added = 0; 431 432 #ifdef DEBUG 433 vq->in_use = false; 434 vq->last_add_time_valid = false; 435 #endif 436 } 437 438 439 /* 440 * Split ring specific functions - *_split(). 441 */ 442 443 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, 444 const struct vring_desc *desc) 445 { 446 u16 flags; 447 448 if (!vq->do_unmap) 449 return; 450 451 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 452 453 dma_unmap_page(vring_dma_dev(vq), 454 virtio64_to_cpu(vq->vq.vdev, desc->addr), 455 virtio32_to_cpu(vq->vq.vdev, desc->len), 456 (flags & VRING_DESC_F_WRITE) ? 457 DMA_FROM_DEVICE : DMA_TO_DEVICE); 458 } 459 460 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, 461 unsigned int i) 462 { 463 struct vring_desc_extra *extra = vq->split.desc_extra; 464 u16 flags; 465 466 flags = extra[i].flags; 467 468 if (flags & VRING_DESC_F_INDIRECT) { 469 if (!vq->use_dma_api) 470 goto out; 471 472 dma_unmap_single(vring_dma_dev(vq), 473 extra[i].addr, 474 extra[i].len, 475 (flags & VRING_DESC_F_WRITE) ? 476 DMA_FROM_DEVICE : DMA_TO_DEVICE); 477 } else { 478 if (!vq->do_unmap) 479 goto out; 480 481 dma_unmap_page(vring_dma_dev(vq), 482 extra[i].addr, 483 extra[i].len, 484 (flags & VRING_DESC_F_WRITE) ? 485 DMA_FROM_DEVICE : DMA_TO_DEVICE); 486 } 487 488 out: 489 return extra[i].next; 490 } 491 492 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 493 unsigned int total_sg, 494 gfp_t gfp) 495 { 496 struct vring_desc *desc; 497 unsigned int i; 498 499 /* 500 * We require lowmem mappings for the descriptors because 501 * otherwise virt_to_phys will give us bogus addresses in the 502 * virtqueue. 503 */ 504 gfp &= ~__GFP_HIGHMEM; 505 506 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 507 if (!desc) 508 return NULL; 509 510 for (i = 0; i < total_sg; i++) 511 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 512 return desc; 513 } 514 515 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, 516 struct vring_desc *desc, 517 unsigned int i, 518 dma_addr_t addr, 519 unsigned int len, 520 u16 flags, 521 bool indirect) 522 { 523 struct vring_virtqueue *vring = to_vvq(vq); 524 struct vring_desc_extra *extra = vring->split.desc_extra; 525 u16 next; 526 527 desc[i].flags = cpu_to_virtio16(vq->vdev, flags); 528 desc[i].addr = cpu_to_virtio64(vq->vdev, addr); 529 desc[i].len = cpu_to_virtio32(vq->vdev, len); 530 531 if (!indirect) { 532 next = extra[i].next; 533 desc[i].next = cpu_to_virtio16(vq->vdev, next); 534 535 extra[i].addr = addr; 536 extra[i].len = len; 537 extra[i].flags = flags; 538 } else 539 next = virtio16_to_cpu(vq->vdev, desc[i].next); 540 541 return next; 542 } 543 544 static inline int virtqueue_add_split(struct virtqueue *_vq, 545 struct scatterlist *sgs[], 546 unsigned int total_sg, 547 unsigned int out_sgs, 548 unsigned int in_sgs, 549 void *data, 550 void *ctx, 551 gfp_t gfp) 552 { 553 struct vring_virtqueue *vq = to_vvq(_vq); 554 struct scatterlist *sg; 555 struct vring_desc *desc; 556 unsigned int i, n, avail, descs_used, prev, err_idx; 557 int head; 558 bool indirect; 559 560 START_USE(vq); 561 562 BUG_ON(data == NULL); 563 BUG_ON(ctx && vq->indirect); 564 565 if (unlikely(vq->broken)) { 566 END_USE(vq); 567 return -EIO; 568 } 569 570 LAST_ADD_TIME_UPDATE(vq); 571 572 BUG_ON(total_sg == 0); 573 574 head = vq->free_head; 575 576 if (virtqueue_use_indirect(vq, total_sg)) 577 desc = alloc_indirect_split(_vq, total_sg, gfp); 578 else { 579 desc = NULL; 580 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 581 } 582 583 if (desc) { 584 /* Use a single buffer which doesn't continue */ 585 indirect = true; 586 /* Set up rest to use this indirect table. */ 587 i = 0; 588 descs_used = 1; 589 } else { 590 indirect = false; 591 desc = vq->split.vring.desc; 592 i = head; 593 descs_used = total_sg; 594 } 595 596 if (unlikely(vq->vq.num_free < descs_used)) { 597 pr_debug("Can't add buf len %i - avail = %i\n", 598 descs_used, vq->vq.num_free); 599 /* FIXME: for historical reasons, we force a notify here if 600 * there are outgoing parts to the buffer. Presumably the 601 * host should service the ring ASAP. */ 602 if (out_sgs) 603 vq->notify(&vq->vq); 604 if (indirect) 605 kfree(desc); 606 END_USE(vq); 607 return -ENOSPC; 608 } 609 610 for (n = 0; n < out_sgs; n++) { 611 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 612 dma_addr_t addr; 613 614 if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr)) 615 goto unmap_release; 616 617 prev = i; 618 /* Note that we trust indirect descriptor 619 * table since it use stream DMA mapping. 620 */ 621 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, 622 VRING_DESC_F_NEXT, 623 indirect); 624 } 625 } 626 for (; n < (out_sgs + in_sgs); n++) { 627 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 628 dma_addr_t addr; 629 630 if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr)) 631 goto unmap_release; 632 633 prev = i; 634 /* Note that we trust indirect descriptor 635 * table since it use stream DMA mapping. 636 */ 637 i = virtqueue_add_desc_split(_vq, desc, i, addr, 638 sg->length, 639 VRING_DESC_F_NEXT | 640 VRING_DESC_F_WRITE, 641 indirect); 642 } 643 } 644 /* Last one doesn't continue. */ 645 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 646 if (!indirect && vq->do_unmap) 647 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= 648 ~VRING_DESC_F_NEXT; 649 650 if (indirect) { 651 /* Now that the indirect table is filled in, map it. */ 652 dma_addr_t addr = vring_map_single( 653 vq, desc, total_sg * sizeof(struct vring_desc), 654 DMA_TO_DEVICE); 655 if (vring_mapping_error(vq, addr)) { 656 if (vq->premapped) 657 goto free_indirect; 658 659 goto unmap_release; 660 } 661 662 virtqueue_add_desc_split(_vq, vq->split.vring.desc, 663 head, addr, 664 total_sg * sizeof(struct vring_desc), 665 VRING_DESC_F_INDIRECT, 666 false); 667 } 668 669 /* We're using some buffers from the free list. */ 670 vq->vq.num_free -= descs_used; 671 672 /* Update free pointer */ 673 if (indirect) 674 vq->free_head = vq->split.desc_extra[head].next; 675 else 676 vq->free_head = i; 677 678 /* Store token and indirect buffer state. */ 679 vq->split.desc_state[head].data = data; 680 if (indirect) 681 vq->split.desc_state[head].indir_desc = desc; 682 else 683 vq->split.desc_state[head].indir_desc = ctx; 684 685 /* Put entry in available array (but don't update avail->idx until they 686 * do sync). */ 687 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 688 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 689 690 /* Descriptors and available array need to be set before we expose the 691 * new available array entries. */ 692 virtio_wmb(vq->weak_barriers); 693 vq->split.avail_idx_shadow++; 694 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 695 vq->split.avail_idx_shadow); 696 vq->num_added++; 697 698 pr_debug("Added buffer head %i to %p\n", head, vq); 699 END_USE(vq); 700 701 /* This is very unlikely, but theoretically possible. Kick 702 * just in case. */ 703 if (unlikely(vq->num_added == (1 << 16) - 1)) 704 virtqueue_kick(_vq); 705 706 return 0; 707 708 unmap_release: 709 err_idx = i; 710 711 if (indirect) 712 i = 0; 713 else 714 i = head; 715 716 for (n = 0; n < total_sg; n++) { 717 if (i == err_idx) 718 break; 719 if (indirect) { 720 vring_unmap_one_split_indirect(vq, &desc[i]); 721 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 722 } else 723 i = vring_unmap_one_split(vq, i); 724 } 725 726 free_indirect: 727 if (indirect) 728 kfree(desc); 729 730 END_USE(vq); 731 return -ENOMEM; 732 } 733 734 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 735 { 736 struct vring_virtqueue *vq = to_vvq(_vq); 737 u16 new, old; 738 bool needs_kick; 739 740 START_USE(vq); 741 /* We need to expose available array entries before checking avail 742 * event. */ 743 virtio_mb(vq->weak_barriers); 744 745 old = vq->split.avail_idx_shadow - vq->num_added; 746 new = vq->split.avail_idx_shadow; 747 vq->num_added = 0; 748 749 LAST_ADD_TIME_CHECK(vq); 750 LAST_ADD_TIME_INVALID(vq); 751 752 if (vq->event) { 753 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 754 vring_avail_event(&vq->split.vring)), 755 new, old); 756 } else { 757 needs_kick = !(vq->split.vring.used->flags & 758 cpu_to_virtio16(_vq->vdev, 759 VRING_USED_F_NO_NOTIFY)); 760 } 761 END_USE(vq); 762 return needs_kick; 763 } 764 765 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 766 void **ctx) 767 { 768 unsigned int i, j; 769 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 770 771 /* Clear data ptr. */ 772 vq->split.desc_state[head].data = NULL; 773 774 /* Put back on free list: unmap first-level descriptors and find end */ 775 i = head; 776 777 while (vq->split.vring.desc[i].flags & nextflag) { 778 vring_unmap_one_split(vq, i); 779 i = vq->split.desc_extra[i].next; 780 vq->vq.num_free++; 781 } 782 783 vring_unmap_one_split(vq, i); 784 vq->split.desc_extra[i].next = vq->free_head; 785 vq->free_head = head; 786 787 /* Plus final descriptor */ 788 vq->vq.num_free++; 789 790 if (vq->indirect) { 791 struct vring_desc *indir_desc = 792 vq->split.desc_state[head].indir_desc; 793 u32 len; 794 795 /* Free the indirect table, if any, now that it's unmapped. */ 796 if (!indir_desc) 797 return; 798 799 len = vq->split.desc_extra[head].len; 800 801 BUG_ON(!(vq->split.desc_extra[head].flags & 802 VRING_DESC_F_INDIRECT)); 803 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 804 805 if (vq->do_unmap) { 806 for (j = 0; j < len / sizeof(struct vring_desc); j++) 807 vring_unmap_one_split_indirect(vq, &indir_desc[j]); 808 } 809 810 kfree(indir_desc); 811 vq->split.desc_state[head].indir_desc = NULL; 812 } else if (ctx) { 813 *ctx = vq->split.desc_state[head].indir_desc; 814 } 815 } 816 817 static bool more_used_split(const struct vring_virtqueue *vq) 818 { 819 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 820 vq->split.vring.used->idx); 821 } 822 823 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 824 unsigned int *len, 825 void **ctx) 826 { 827 struct vring_virtqueue *vq = to_vvq(_vq); 828 void *ret; 829 unsigned int i; 830 u16 last_used; 831 832 START_USE(vq); 833 834 if (unlikely(vq->broken)) { 835 END_USE(vq); 836 return NULL; 837 } 838 839 if (!more_used_split(vq)) { 840 pr_debug("No more buffers in queue\n"); 841 END_USE(vq); 842 return NULL; 843 } 844 845 /* Only get used array entries after they have been exposed by host. */ 846 virtio_rmb(vq->weak_barriers); 847 848 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 849 i = virtio32_to_cpu(_vq->vdev, 850 vq->split.vring.used->ring[last_used].id); 851 *len = virtio32_to_cpu(_vq->vdev, 852 vq->split.vring.used->ring[last_used].len); 853 854 if (unlikely(i >= vq->split.vring.num)) { 855 BAD_RING(vq, "id %u out of range\n", i); 856 return NULL; 857 } 858 if (unlikely(!vq->split.desc_state[i].data)) { 859 BAD_RING(vq, "id %u is not a head!\n", i); 860 return NULL; 861 } 862 863 /* detach_buf_split clears data, so grab it now. */ 864 ret = vq->split.desc_state[i].data; 865 detach_buf_split(vq, i, ctx); 866 vq->last_used_idx++; 867 /* If we expect an interrupt for the next entry, tell host 868 * by writing event index and flush out the write before 869 * the read in the next get_buf call. */ 870 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 871 virtio_store_mb(vq->weak_barriers, 872 &vring_used_event(&vq->split.vring), 873 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 874 875 LAST_ADD_TIME_INVALID(vq); 876 877 END_USE(vq); 878 return ret; 879 } 880 881 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 882 { 883 struct vring_virtqueue *vq = to_vvq(_vq); 884 885 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 886 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 887 888 /* 889 * If device triggered an event already it won't trigger one again: 890 * no need to disable. 891 */ 892 if (vq->event_triggered) 893 return; 894 895 if (vq->event) 896 /* TODO: this is a hack. Figure out a cleaner value to write. */ 897 vring_used_event(&vq->split.vring) = 0x0; 898 else 899 vq->split.vring.avail->flags = 900 cpu_to_virtio16(_vq->vdev, 901 vq->split.avail_flags_shadow); 902 } 903 } 904 905 static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 906 { 907 struct vring_virtqueue *vq = to_vvq(_vq); 908 u16 last_used_idx; 909 910 START_USE(vq); 911 912 /* We optimistically turn back on interrupts, then check if there was 913 * more to do. */ 914 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 915 * either clear the flags bit or point the event index at the next 916 * entry. Always do both to keep code simple. */ 917 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 918 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 919 if (!vq->event) 920 vq->split.vring.avail->flags = 921 cpu_to_virtio16(_vq->vdev, 922 vq->split.avail_flags_shadow); 923 } 924 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 925 last_used_idx = vq->last_used_idx); 926 END_USE(vq); 927 return last_used_idx; 928 } 929 930 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx) 931 { 932 struct vring_virtqueue *vq = to_vvq(_vq); 933 934 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 935 vq->split.vring.used->idx); 936 } 937 938 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 939 { 940 struct vring_virtqueue *vq = to_vvq(_vq); 941 u16 bufs; 942 943 START_USE(vq); 944 945 /* We optimistically turn back on interrupts, then check if there was 946 * more to do. */ 947 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 948 * either clear the flags bit or point the event index at the next 949 * entry. Always update the event index to keep code simple. */ 950 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 951 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 952 if (!vq->event) 953 vq->split.vring.avail->flags = 954 cpu_to_virtio16(_vq->vdev, 955 vq->split.avail_flags_shadow); 956 } 957 /* TODO: tune this threshold */ 958 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 959 960 virtio_store_mb(vq->weak_barriers, 961 &vring_used_event(&vq->split.vring), 962 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 963 964 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 965 - vq->last_used_idx) > bufs)) { 966 END_USE(vq); 967 return false; 968 } 969 970 END_USE(vq); 971 return true; 972 } 973 974 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 975 { 976 struct vring_virtqueue *vq = to_vvq(_vq); 977 unsigned int i; 978 void *buf; 979 980 START_USE(vq); 981 982 for (i = 0; i < vq->split.vring.num; i++) { 983 if (!vq->split.desc_state[i].data) 984 continue; 985 /* detach_buf_split clears data, so grab it now. */ 986 buf = vq->split.desc_state[i].data; 987 detach_buf_split(vq, i, NULL); 988 vq->split.avail_idx_shadow--; 989 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 990 vq->split.avail_idx_shadow); 991 END_USE(vq); 992 return buf; 993 } 994 /* That should have freed everything. */ 995 BUG_ON(vq->vq.num_free != vq->split.vring.num); 996 997 END_USE(vq); 998 return NULL; 999 } 1000 1001 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split, 1002 struct vring_virtqueue *vq) 1003 { 1004 struct virtio_device *vdev; 1005 1006 vdev = vq->vq.vdev; 1007 1008 vring_split->avail_flags_shadow = 0; 1009 vring_split->avail_idx_shadow = 0; 1010 1011 /* No callback? Tell other side not to bother us. */ 1012 if (!vq->vq.callback) { 1013 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 1014 if (!vq->event) 1015 vring_split->vring.avail->flags = cpu_to_virtio16(vdev, 1016 vring_split->avail_flags_shadow); 1017 } 1018 } 1019 1020 static void virtqueue_reinit_split(struct vring_virtqueue *vq) 1021 { 1022 int num; 1023 1024 num = vq->split.vring.num; 1025 1026 vq->split.vring.avail->flags = 0; 1027 vq->split.vring.avail->idx = 0; 1028 1029 /* reset avail event */ 1030 vq->split.vring.avail->ring[num] = 0; 1031 1032 vq->split.vring.used->flags = 0; 1033 vq->split.vring.used->idx = 0; 1034 1035 /* reset used event */ 1036 *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0; 1037 1038 virtqueue_init(vq, num); 1039 1040 virtqueue_vring_init_split(&vq->split, vq); 1041 } 1042 1043 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq, 1044 struct vring_virtqueue_split *vring_split) 1045 { 1046 vq->split = *vring_split; 1047 1048 /* Put everything in free lists. */ 1049 vq->free_head = 0; 1050 } 1051 1052 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split) 1053 { 1054 struct vring_desc_state_split *state; 1055 struct vring_desc_extra *extra; 1056 u32 num = vring_split->vring.num; 1057 1058 state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL); 1059 if (!state) 1060 goto err_state; 1061 1062 extra = vring_alloc_desc_extra(num); 1063 if (!extra) 1064 goto err_extra; 1065 1066 memset(state, 0, num * sizeof(struct vring_desc_state_split)); 1067 1068 vring_split->desc_state = state; 1069 vring_split->desc_extra = extra; 1070 return 0; 1071 1072 err_extra: 1073 kfree(state); 1074 err_state: 1075 return -ENOMEM; 1076 } 1077 1078 static void vring_free_split(struct vring_virtqueue_split *vring_split, 1079 struct virtio_device *vdev, struct device *dma_dev) 1080 { 1081 vring_free_queue(vdev, vring_split->queue_size_in_bytes, 1082 vring_split->vring.desc, 1083 vring_split->queue_dma_addr, 1084 dma_dev); 1085 1086 kfree(vring_split->desc_state); 1087 kfree(vring_split->desc_extra); 1088 } 1089 1090 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, 1091 struct virtio_device *vdev, 1092 u32 num, 1093 unsigned int vring_align, 1094 bool may_reduce_num, 1095 struct device *dma_dev) 1096 { 1097 void *queue = NULL; 1098 dma_addr_t dma_addr; 1099 1100 /* We assume num is a power of 2. */ 1101 if (!is_power_of_2(num)) { 1102 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 1103 return -EINVAL; 1104 } 1105 1106 /* TODO: allocate each queue chunk individually */ 1107 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 1108 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1109 &dma_addr, 1110 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1111 dma_dev); 1112 if (queue) 1113 break; 1114 if (!may_reduce_num) 1115 return -ENOMEM; 1116 } 1117 1118 if (!num) 1119 return -ENOMEM; 1120 1121 if (!queue) { 1122 /* Try to get a single page. You are my only hope! */ 1123 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1124 &dma_addr, GFP_KERNEL | __GFP_ZERO, 1125 dma_dev); 1126 } 1127 if (!queue) 1128 return -ENOMEM; 1129 1130 vring_init(&vring_split->vring, num, queue, vring_align); 1131 1132 vring_split->queue_dma_addr = dma_addr; 1133 vring_split->queue_size_in_bytes = vring_size(num, vring_align); 1134 1135 vring_split->vring_align = vring_align; 1136 vring_split->may_reduce_num = may_reduce_num; 1137 1138 return 0; 1139 } 1140 1141 static struct virtqueue *vring_create_virtqueue_split( 1142 unsigned int index, 1143 unsigned int num, 1144 unsigned int vring_align, 1145 struct virtio_device *vdev, 1146 bool weak_barriers, 1147 bool may_reduce_num, 1148 bool context, 1149 bool (*notify)(struct virtqueue *), 1150 void (*callback)(struct virtqueue *), 1151 const char *name, 1152 struct device *dma_dev) 1153 { 1154 struct vring_virtqueue_split vring_split = {}; 1155 struct virtqueue *vq; 1156 int err; 1157 1158 err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align, 1159 may_reduce_num, dma_dev); 1160 if (err) 1161 return NULL; 1162 1163 vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers, 1164 context, notify, callback, name, dma_dev); 1165 if (!vq) { 1166 vring_free_split(&vring_split, vdev, dma_dev); 1167 return NULL; 1168 } 1169 1170 to_vvq(vq)->we_own_ring = true; 1171 1172 return vq; 1173 } 1174 1175 static int virtqueue_resize_split(struct virtqueue *_vq, u32 num) 1176 { 1177 struct vring_virtqueue_split vring_split = {}; 1178 struct vring_virtqueue *vq = to_vvq(_vq); 1179 struct virtio_device *vdev = _vq->vdev; 1180 int err; 1181 1182 err = vring_alloc_queue_split(&vring_split, vdev, num, 1183 vq->split.vring_align, 1184 vq->split.may_reduce_num, 1185 vring_dma_dev(vq)); 1186 if (err) 1187 goto err; 1188 1189 err = vring_alloc_state_extra_split(&vring_split); 1190 if (err) 1191 goto err_state_extra; 1192 1193 vring_free(&vq->vq); 1194 1195 virtqueue_vring_init_split(&vring_split, vq); 1196 1197 virtqueue_init(vq, vring_split.vring.num); 1198 virtqueue_vring_attach_split(vq, &vring_split); 1199 1200 return 0; 1201 1202 err_state_extra: 1203 vring_free_split(&vring_split, vdev, vring_dma_dev(vq)); 1204 err: 1205 virtqueue_reinit_split(vq); 1206 return -ENOMEM; 1207 } 1208 1209 1210 /* 1211 * Packed ring specific functions - *_packed(). 1212 */ 1213 static bool packed_used_wrap_counter(u16 last_used_idx) 1214 { 1215 return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 1216 } 1217 1218 static u16 packed_last_used(u16 last_used_idx) 1219 { 1220 return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 1221 } 1222 1223 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, 1224 const struct vring_desc_extra *extra) 1225 { 1226 u16 flags; 1227 1228 flags = extra->flags; 1229 1230 if (flags & VRING_DESC_F_INDIRECT) { 1231 if (!vq->use_dma_api) 1232 return; 1233 1234 dma_unmap_single(vring_dma_dev(vq), 1235 extra->addr, extra->len, 1236 (flags & VRING_DESC_F_WRITE) ? 1237 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1238 } else { 1239 if (!vq->do_unmap) 1240 return; 1241 1242 dma_unmap_page(vring_dma_dev(vq), 1243 extra->addr, extra->len, 1244 (flags & VRING_DESC_F_WRITE) ? 1245 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1246 } 1247 } 1248 1249 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 1250 const struct vring_packed_desc *desc) 1251 { 1252 u16 flags; 1253 1254 if (!vq->do_unmap) 1255 return; 1256 1257 flags = le16_to_cpu(desc->flags); 1258 1259 dma_unmap_page(vring_dma_dev(vq), 1260 le64_to_cpu(desc->addr), 1261 le32_to_cpu(desc->len), 1262 (flags & VRING_DESC_F_WRITE) ? 1263 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1264 } 1265 1266 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 1267 gfp_t gfp) 1268 { 1269 struct vring_packed_desc *desc; 1270 1271 /* 1272 * We require lowmem mappings for the descriptors because 1273 * otherwise virt_to_phys will give us bogus addresses in the 1274 * virtqueue. 1275 */ 1276 gfp &= ~__GFP_HIGHMEM; 1277 1278 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 1279 1280 return desc; 1281 } 1282 1283 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 1284 struct scatterlist *sgs[], 1285 unsigned int total_sg, 1286 unsigned int out_sgs, 1287 unsigned int in_sgs, 1288 void *data, 1289 gfp_t gfp) 1290 { 1291 struct vring_packed_desc *desc; 1292 struct scatterlist *sg; 1293 unsigned int i, n, err_idx; 1294 u16 head, id; 1295 dma_addr_t addr; 1296 1297 head = vq->packed.next_avail_idx; 1298 desc = alloc_indirect_packed(total_sg, gfp); 1299 if (!desc) 1300 return -ENOMEM; 1301 1302 if (unlikely(vq->vq.num_free < 1)) { 1303 pr_debug("Can't add buf len 1 - avail = 0\n"); 1304 kfree(desc); 1305 END_USE(vq); 1306 return -ENOSPC; 1307 } 1308 1309 i = 0; 1310 id = vq->free_head; 1311 BUG_ON(id == vq->packed.vring.num); 1312 1313 for (n = 0; n < out_sgs + in_sgs; n++) { 1314 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1315 if (vring_map_one_sg(vq, sg, n < out_sgs ? 1316 DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr)) 1317 goto unmap_release; 1318 1319 desc[i].flags = cpu_to_le16(n < out_sgs ? 1320 0 : VRING_DESC_F_WRITE); 1321 desc[i].addr = cpu_to_le64(addr); 1322 desc[i].len = cpu_to_le32(sg->length); 1323 i++; 1324 } 1325 } 1326 1327 /* Now that the indirect table is filled in, map it. */ 1328 addr = vring_map_single(vq, desc, 1329 total_sg * sizeof(struct vring_packed_desc), 1330 DMA_TO_DEVICE); 1331 if (vring_mapping_error(vq, addr)) { 1332 if (vq->premapped) 1333 goto free_desc; 1334 1335 goto unmap_release; 1336 } 1337 1338 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1339 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1340 sizeof(struct vring_packed_desc)); 1341 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1342 1343 if (vq->use_dma_api) { 1344 vq->packed.desc_extra[id].addr = addr; 1345 vq->packed.desc_extra[id].len = total_sg * 1346 sizeof(struct vring_packed_desc); 1347 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1348 vq->packed.avail_used_flags; 1349 } 1350 1351 /* 1352 * A driver MUST NOT make the first descriptor in the list 1353 * available before all subsequent descriptors comprising 1354 * the list are made available. 1355 */ 1356 virtio_wmb(vq->weak_barriers); 1357 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1358 vq->packed.avail_used_flags); 1359 1360 /* We're using some buffers from the free list. */ 1361 vq->vq.num_free -= 1; 1362 1363 /* Update free pointer */ 1364 n = head + 1; 1365 if (n >= vq->packed.vring.num) { 1366 n = 0; 1367 vq->packed.avail_wrap_counter ^= 1; 1368 vq->packed.avail_used_flags ^= 1369 1 << VRING_PACKED_DESC_F_AVAIL | 1370 1 << VRING_PACKED_DESC_F_USED; 1371 } 1372 vq->packed.next_avail_idx = n; 1373 vq->free_head = vq->packed.desc_extra[id].next; 1374 1375 /* Store token and indirect buffer state. */ 1376 vq->packed.desc_state[id].num = 1; 1377 vq->packed.desc_state[id].data = data; 1378 vq->packed.desc_state[id].indir_desc = desc; 1379 vq->packed.desc_state[id].last = id; 1380 1381 vq->num_added += 1; 1382 1383 pr_debug("Added buffer head %i to %p\n", head, vq); 1384 END_USE(vq); 1385 1386 return 0; 1387 1388 unmap_release: 1389 err_idx = i; 1390 1391 for (i = 0; i < err_idx; i++) 1392 vring_unmap_desc_packed(vq, &desc[i]); 1393 1394 free_desc: 1395 kfree(desc); 1396 1397 END_USE(vq); 1398 return -ENOMEM; 1399 } 1400 1401 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1402 struct scatterlist *sgs[], 1403 unsigned int total_sg, 1404 unsigned int out_sgs, 1405 unsigned int in_sgs, 1406 void *data, 1407 void *ctx, 1408 gfp_t gfp) 1409 { 1410 struct vring_virtqueue *vq = to_vvq(_vq); 1411 struct vring_packed_desc *desc; 1412 struct scatterlist *sg; 1413 unsigned int i, n, c, descs_used, err_idx; 1414 __le16 head_flags, flags; 1415 u16 head, id, prev, curr, avail_used_flags; 1416 int err; 1417 1418 START_USE(vq); 1419 1420 BUG_ON(data == NULL); 1421 BUG_ON(ctx && vq->indirect); 1422 1423 if (unlikely(vq->broken)) { 1424 END_USE(vq); 1425 return -EIO; 1426 } 1427 1428 LAST_ADD_TIME_UPDATE(vq); 1429 1430 BUG_ON(total_sg == 0); 1431 1432 if (virtqueue_use_indirect(vq, total_sg)) { 1433 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1434 in_sgs, data, gfp); 1435 if (err != -ENOMEM) { 1436 END_USE(vq); 1437 return err; 1438 } 1439 1440 /* fall back on direct */ 1441 } 1442 1443 head = vq->packed.next_avail_idx; 1444 avail_used_flags = vq->packed.avail_used_flags; 1445 1446 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1447 1448 desc = vq->packed.vring.desc; 1449 i = head; 1450 descs_used = total_sg; 1451 1452 if (unlikely(vq->vq.num_free < descs_used)) { 1453 pr_debug("Can't add buf len %i - avail = %i\n", 1454 descs_used, vq->vq.num_free); 1455 END_USE(vq); 1456 return -ENOSPC; 1457 } 1458 1459 id = vq->free_head; 1460 BUG_ON(id == vq->packed.vring.num); 1461 1462 curr = id; 1463 c = 0; 1464 for (n = 0; n < out_sgs + in_sgs; n++) { 1465 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1466 dma_addr_t addr; 1467 1468 if (vring_map_one_sg(vq, sg, n < out_sgs ? 1469 DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr)) 1470 goto unmap_release; 1471 1472 flags = cpu_to_le16(vq->packed.avail_used_flags | 1473 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1474 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1475 if (i == head) 1476 head_flags = flags; 1477 else 1478 desc[i].flags = flags; 1479 1480 desc[i].addr = cpu_to_le64(addr); 1481 desc[i].len = cpu_to_le32(sg->length); 1482 desc[i].id = cpu_to_le16(id); 1483 1484 if (unlikely(vq->use_dma_api)) { 1485 vq->packed.desc_extra[curr].addr = addr; 1486 vq->packed.desc_extra[curr].len = sg->length; 1487 vq->packed.desc_extra[curr].flags = 1488 le16_to_cpu(flags); 1489 } 1490 prev = curr; 1491 curr = vq->packed.desc_extra[curr].next; 1492 1493 if ((unlikely(++i >= vq->packed.vring.num))) { 1494 i = 0; 1495 vq->packed.avail_used_flags ^= 1496 1 << VRING_PACKED_DESC_F_AVAIL | 1497 1 << VRING_PACKED_DESC_F_USED; 1498 } 1499 } 1500 } 1501 1502 if (i <= head) 1503 vq->packed.avail_wrap_counter ^= 1; 1504 1505 /* We're using some buffers from the free list. */ 1506 vq->vq.num_free -= descs_used; 1507 1508 /* Update free pointer */ 1509 vq->packed.next_avail_idx = i; 1510 vq->free_head = curr; 1511 1512 /* Store token. */ 1513 vq->packed.desc_state[id].num = descs_used; 1514 vq->packed.desc_state[id].data = data; 1515 vq->packed.desc_state[id].indir_desc = ctx; 1516 vq->packed.desc_state[id].last = prev; 1517 1518 /* 1519 * A driver MUST NOT make the first descriptor in the list 1520 * available before all subsequent descriptors comprising 1521 * the list are made available. 1522 */ 1523 virtio_wmb(vq->weak_barriers); 1524 vq->packed.vring.desc[head].flags = head_flags; 1525 vq->num_added += descs_used; 1526 1527 pr_debug("Added buffer head %i to %p\n", head, vq); 1528 END_USE(vq); 1529 1530 return 0; 1531 1532 unmap_release: 1533 err_idx = i; 1534 i = head; 1535 curr = vq->free_head; 1536 1537 vq->packed.avail_used_flags = avail_used_flags; 1538 1539 for (n = 0; n < total_sg; n++) { 1540 if (i == err_idx) 1541 break; 1542 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); 1543 curr = vq->packed.desc_extra[curr].next; 1544 i++; 1545 if (i >= vq->packed.vring.num) 1546 i = 0; 1547 } 1548 1549 END_USE(vq); 1550 return -EIO; 1551 } 1552 1553 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1554 { 1555 struct vring_virtqueue *vq = to_vvq(_vq); 1556 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1557 bool needs_kick; 1558 union { 1559 struct { 1560 __le16 off_wrap; 1561 __le16 flags; 1562 }; 1563 u32 u32; 1564 } snapshot; 1565 1566 START_USE(vq); 1567 1568 /* 1569 * We need to expose the new flags value before checking notification 1570 * suppressions. 1571 */ 1572 virtio_mb(vq->weak_barriers); 1573 1574 old = vq->packed.next_avail_idx - vq->num_added; 1575 new = vq->packed.next_avail_idx; 1576 vq->num_added = 0; 1577 1578 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1579 flags = le16_to_cpu(snapshot.flags); 1580 1581 LAST_ADD_TIME_CHECK(vq); 1582 LAST_ADD_TIME_INVALID(vq); 1583 1584 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1585 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1586 goto out; 1587 } 1588 1589 off_wrap = le16_to_cpu(snapshot.off_wrap); 1590 1591 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1592 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1593 if (wrap_counter != vq->packed.avail_wrap_counter) 1594 event_idx -= vq->packed.vring.num; 1595 1596 needs_kick = vring_need_event(event_idx, new, old); 1597 out: 1598 END_USE(vq); 1599 return needs_kick; 1600 } 1601 1602 static void detach_buf_packed(struct vring_virtqueue *vq, 1603 unsigned int id, void **ctx) 1604 { 1605 struct vring_desc_state_packed *state = NULL; 1606 struct vring_packed_desc *desc; 1607 unsigned int i, curr; 1608 1609 state = &vq->packed.desc_state[id]; 1610 1611 /* Clear data ptr. */ 1612 state->data = NULL; 1613 1614 vq->packed.desc_extra[state->last].next = vq->free_head; 1615 vq->free_head = id; 1616 vq->vq.num_free += state->num; 1617 1618 if (unlikely(vq->use_dma_api)) { 1619 curr = id; 1620 for (i = 0; i < state->num; i++) { 1621 vring_unmap_extra_packed(vq, 1622 &vq->packed.desc_extra[curr]); 1623 curr = vq->packed.desc_extra[curr].next; 1624 } 1625 } 1626 1627 if (vq->indirect) { 1628 u32 len; 1629 1630 /* Free the indirect table, if any, now that it's unmapped. */ 1631 desc = state->indir_desc; 1632 if (!desc) 1633 return; 1634 1635 if (vq->do_unmap) { 1636 len = vq->packed.desc_extra[id].len; 1637 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1638 i++) 1639 vring_unmap_desc_packed(vq, &desc[i]); 1640 } 1641 kfree(desc); 1642 state->indir_desc = NULL; 1643 } else if (ctx) { 1644 *ctx = state->indir_desc; 1645 } 1646 } 1647 1648 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1649 u16 idx, bool used_wrap_counter) 1650 { 1651 bool avail, used; 1652 u16 flags; 1653 1654 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1655 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1656 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1657 1658 return avail == used && used == used_wrap_counter; 1659 } 1660 1661 static bool more_used_packed(const struct vring_virtqueue *vq) 1662 { 1663 u16 last_used; 1664 u16 last_used_idx; 1665 bool used_wrap_counter; 1666 1667 last_used_idx = READ_ONCE(vq->last_used_idx); 1668 last_used = packed_last_used(last_used_idx); 1669 used_wrap_counter = packed_used_wrap_counter(last_used_idx); 1670 return is_used_desc_packed(vq, last_used, used_wrap_counter); 1671 } 1672 1673 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1674 unsigned int *len, 1675 void **ctx) 1676 { 1677 struct vring_virtqueue *vq = to_vvq(_vq); 1678 u16 last_used, id, last_used_idx; 1679 bool used_wrap_counter; 1680 void *ret; 1681 1682 START_USE(vq); 1683 1684 if (unlikely(vq->broken)) { 1685 END_USE(vq); 1686 return NULL; 1687 } 1688 1689 if (!more_used_packed(vq)) { 1690 pr_debug("No more buffers in queue\n"); 1691 END_USE(vq); 1692 return NULL; 1693 } 1694 1695 /* Only get used elements after they have been exposed by host. */ 1696 virtio_rmb(vq->weak_barriers); 1697 1698 last_used_idx = READ_ONCE(vq->last_used_idx); 1699 used_wrap_counter = packed_used_wrap_counter(last_used_idx); 1700 last_used = packed_last_used(last_used_idx); 1701 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1702 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1703 1704 if (unlikely(id >= vq->packed.vring.num)) { 1705 BAD_RING(vq, "id %u out of range\n", id); 1706 return NULL; 1707 } 1708 if (unlikely(!vq->packed.desc_state[id].data)) { 1709 BAD_RING(vq, "id %u is not a head!\n", id); 1710 return NULL; 1711 } 1712 1713 /* detach_buf_packed clears data, so grab it now. */ 1714 ret = vq->packed.desc_state[id].data; 1715 detach_buf_packed(vq, id, ctx); 1716 1717 last_used += vq->packed.desc_state[id].num; 1718 if (unlikely(last_used >= vq->packed.vring.num)) { 1719 last_used -= vq->packed.vring.num; 1720 used_wrap_counter ^= 1; 1721 } 1722 1723 last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1724 WRITE_ONCE(vq->last_used_idx, last_used); 1725 1726 /* 1727 * If we expect an interrupt for the next entry, tell host 1728 * by writing event index and flush out the write before 1729 * the read in the next get_buf call. 1730 */ 1731 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1732 virtio_store_mb(vq->weak_barriers, 1733 &vq->packed.vring.driver->off_wrap, 1734 cpu_to_le16(vq->last_used_idx)); 1735 1736 LAST_ADD_TIME_INVALID(vq); 1737 1738 END_USE(vq); 1739 return ret; 1740 } 1741 1742 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1743 { 1744 struct vring_virtqueue *vq = to_vvq(_vq); 1745 1746 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1747 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1748 1749 /* 1750 * If device triggered an event already it won't trigger one again: 1751 * no need to disable. 1752 */ 1753 if (vq->event_triggered) 1754 return; 1755 1756 vq->packed.vring.driver->flags = 1757 cpu_to_le16(vq->packed.event_flags_shadow); 1758 } 1759 } 1760 1761 static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1762 { 1763 struct vring_virtqueue *vq = to_vvq(_vq); 1764 1765 START_USE(vq); 1766 1767 /* 1768 * We optimistically turn back on interrupts, then check if there was 1769 * more to do. 1770 */ 1771 1772 if (vq->event) { 1773 vq->packed.vring.driver->off_wrap = 1774 cpu_to_le16(vq->last_used_idx); 1775 /* 1776 * We need to update event offset and event wrap 1777 * counter first before updating event flags. 1778 */ 1779 virtio_wmb(vq->weak_barriers); 1780 } 1781 1782 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1783 vq->packed.event_flags_shadow = vq->event ? 1784 VRING_PACKED_EVENT_FLAG_DESC : 1785 VRING_PACKED_EVENT_FLAG_ENABLE; 1786 vq->packed.vring.driver->flags = 1787 cpu_to_le16(vq->packed.event_flags_shadow); 1788 } 1789 1790 END_USE(vq); 1791 return vq->last_used_idx; 1792 } 1793 1794 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1795 { 1796 struct vring_virtqueue *vq = to_vvq(_vq); 1797 bool wrap_counter; 1798 u16 used_idx; 1799 1800 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1801 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1802 1803 return is_used_desc_packed(vq, used_idx, wrap_counter); 1804 } 1805 1806 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1807 { 1808 struct vring_virtqueue *vq = to_vvq(_vq); 1809 u16 used_idx, wrap_counter, last_used_idx; 1810 u16 bufs; 1811 1812 START_USE(vq); 1813 1814 /* 1815 * We optimistically turn back on interrupts, then check if there was 1816 * more to do. 1817 */ 1818 1819 if (vq->event) { 1820 /* TODO: tune this threshold */ 1821 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1822 last_used_idx = READ_ONCE(vq->last_used_idx); 1823 wrap_counter = packed_used_wrap_counter(last_used_idx); 1824 1825 used_idx = packed_last_used(last_used_idx) + bufs; 1826 if (used_idx >= vq->packed.vring.num) { 1827 used_idx -= vq->packed.vring.num; 1828 wrap_counter ^= 1; 1829 } 1830 1831 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1832 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1833 1834 /* 1835 * We need to update event offset and event wrap 1836 * counter first before updating event flags. 1837 */ 1838 virtio_wmb(vq->weak_barriers); 1839 } 1840 1841 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1842 vq->packed.event_flags_shadow = vq->event ? 1843 VRING_PACKED_EVENT_FLAG_DESC : 1844 VRING_PACKED_EVENT_FLAG_ENABLE; 1845 vq->packed.vring.driver->flags = 1846 cpu_to_le16(vq->packed.event_flags_shadow); 1847 } 1848 1849 /* 1850 * We need to update event suppression structure first 1851 * before re-checking for more used buffers. 1852 */ 1853 virtio_mb(vq->weak_barriers); 1854 1855 last_used_idx = READ_ONCE(vq->last_used_idx); 1856 wrap_counter = packed_used_wrap_counter(last_used_idx); 1857 used_idx = packed_last_used(last_used_idx); 1858 if (is_used_desc_packed(vq, used_idx, wrap_counter)) { 1859 END_USE(vq); 1860 return false; 1861 } 1862 1863 END_USE(vq); 1864 return true; 1865 } 1866 1867 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1868 { 1869 struct vring_virtqueue *vq = to_vvq(_vq); 1870 unsigned int i; 1871 void *buf; 1872 1873 START_USE(vq); 1874 1875 for (i = 0; i < vq->packed.vring.num; i++) { 1876 if (!vq->packed.desc_state[i].data) 1877 continue; 1878 /* detach_buf clears data, so grab it now. */ 1879 buf = vq->packed.desc_state[i].data; 1880 detach_buf_packed(vq, i, NULL); 1881 END_USE(vq); 1882 return buf; 1883 } 1884 /* That should have freed everything. */ 1885 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1886 1887 END_USE(vq); 1888 return NULL; 1889 } 1890 1891 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num) 1892 { 1893 struct vring_desc_extra *desc_extra; 1894 unsigned int i; 1895 1896 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), 1897 GFP_KERNEL); 1898 if (!desc_extra) 1899 return NULL; 1900 1901 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); 1902 1903 for (i = 0; i < num - 1; i++) 1904 desc_extra[i].next = i + 1; 1905 1906 return desc_extra; 1907 } 1908 1909 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed, 1910 struct virtio_device *vdev, 1911 struct device *dma_dev) 1912 { 1913 if (vring_packed->vring.desc) 1914 vring_free_queue(vdev, vring_packed->ring_size_in_bytes, 1915 vring_packed->vring.desc, 1916 vring_packed->ring_dma_addr, 1917 dma_dev); 1918 1919 if (vring_packed->vring.driver) 1920 vring_free_queue(vdev, vring_packed->event_size_in_bytes, 1921 vring_packed->vring.driver, 1922 vring_packed->driver_event_dma_addr, 1923 dma_dev); 1924 1925 if (vring_packed->vring.device) 1926 vring_free_queue(vdev, vring_packed->event_size_in_bytes, 1927 vring_packed->vring.device, 1928 vring_packed->device_event_dma_addr, 1929 dma_dev); 1930 1931 kfree(vring_packed->desc_state); 1932 kfree(vring_packed->desc_extra); 1933 } 1934 1935 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, 1936 struct virtio_device *vdev, 1937 u32 num, struct device *dma_dev) 1938 { 1939 struct vring_packed_desc *ring; 1940 struct vring_packed_desc_event *driver, *device; 1941 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1942 size_t ring_size_in_bytes, event_size_in_bytes; 1943 1944 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1945 1946 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1947 &ring_dma_addr, 1948 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1949 dma_dev); 1950 if (!ring) 1951 goto err; 1952 1953 vring_packed->vring.desc = ring; 1954 vring_packed->ring_dma_addr = ring_dma_addr; 1955 vring_packed->ring_size_in_bytes = ring_size_in_bytes; 1956 1957 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1958 1959 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1960 &driver_event_dma_addr, 1961 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1962 dma_dev); 1963 if (!driver) 1964 goto err; 1965 1966 vring_packed->vring.driver = driver; 1967 vring_packed->event_size_in_bytes = event_size_in_bytes; 1968 vring_packed->driver_event_dma_addr = driver_event_dma_addr; 1969 1970 device = vring_alloc_queue(vdev, event_size_in_bytes, 1971 &device_event_dma_addr, 1972 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1973 dma_dev); 1974 if (!device) 1975 goto err; 1976 1977 vring_packed->vring.device = device; 1978 vring_packed->device_event_dma_addr = device_event_dma_addr; 1979 1980 vring_packed->vring.num = num; 1981 1982 return 0; 1983 1984 err: 1985 vring_free_packed(vring_packed, vdev, dma_dev); 1986 return -ENOMEM; 1987 } 1988 1989 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed) 1990 { 1991 struct vring_desc_state_packed *state; 1992 struct vring_desc_extra *extra; 1993 u32 num = vring_packed->vring.num; 1994 1995 state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL); 1996 if (!state) 1997 goto err_desc_state; 1998 1999 memset(state, 0, num * sizeof(struct vring_desc_state_packed)); 2000 2001 extra = vring_alloc_desc_extra(num); 2002 if (!extra) 2003 goto err_desc_extra; 2004 2005 vring_packed->desc_state = state; 2006 vring_packed->desc_extra = extra; 2007 2008 return 0; 2009 2010 err_desc_extra: 2011 kfree(state); 2012 err_desc_state: 2013 return -ENOMEM; 2014 } 2015 2016 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed, 2017 bool callback) 2018 { 2019 vring_packed->next_avail_idx = 0; 2020 vring_packed->avail_wrap_counter = 1; 2021 vring_packed->event_flags_shadow = 0; 2022 vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 2023 2024 /* No callback? Tell other side not to bother us. */ 2025 if (!callback) { 2026 vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 2027 vring_packed->vring.driver->flags = 2028 cpu_to_le16(vring_packed->event_flags_shadow); 2029 } 2030 } 2031 2032 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq, 2033 struct vring_virtqueue_packed *vring_packed) 2034 { 2035 vq->packed = *vring_packed; 2036 2037 /* Put everything in free lists. */ 2038 vq->free_head = 0; 2039 } 2040 2041 static void virtqueue_reinit_packed(struct vring_virtqueue *vq) 2042 { 2043 memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes); 2044 memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes); 2045 2046 /* we need to reset the desc.flags. For more, see is_used_desc_packed() */ 2047 memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes); 2048 2049 virtqueue_init(vq, vq->packed.vring.num); 2050 virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback); 2051 } 2052 2053 static struct virtqueue *vring_create_virtqueue_packed( 2054 unsigned int index, 2055 unsigned int num, 2056 unsigned int vring_align, 2057 struct virtio_device *vdev, 2058 bool weak_barriers, 2059 bool may_reduce_num, 2060 bool context, 2061 bool (*notify)(struct virtqueue *), 2062 void (*callback)(struct virtqueue *), 2063 const char *name, 2064 struct device *dma_dev) 2065 { 2066 struct vring_virtqueue_packed vring_packed = {}; 2067 struct vring_virtqueue *vq; 2068 int err; 2069 2070 if (vring_alloc_queue_packed(&vring_packed, vdev, num, dma_dev)) 2071 goto err_ring; 2072 2073 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2074 if (!vq) 2075 goto err_vq; 2076 2077 vq->vq.callback = callback; 2078 vq->vq.vdev = vdev; 2079 vq->vq.name = name; 2080 vq->vq.index = index; 2081 vq->vq.reset = false; 2082 vq->we_own_ring = true; 2083 vq->notify = notify; 2084 vq->weak_barriers = weak_barriers; 2085 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 2086 vq->broken = true; 2087 #else 2088 vq->broken = false; 2089 #endif 2090 vq->packed_ring = true; 2091 vq->dma_dev = dma_dev; 2092 vq->use_dma_api = vring_use_dma_api(vdev); 2093 vq->premapped = false; 2094 vq->do_unmap = vq->use_dma_api; 2095 2096 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2097 !context; 2098 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2099 2100 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2101 vq->weak_barriers = false; 2102 2103 err = vring_alloc_state_extra_packed(&vring_packed); 2104 if (err) 2105 goto err_state_extra; 2106 2107 virtqueue_vring_init_packed(&vring_packed, !!callback); 2108 2109 virtqueue_init(vq, num); 2110 virtqueue_vring_attach_packed(vq, &vring_packed); 2111 2112 spin_lock(&vdev->vqs_list_lock); 2113 list_add_tail(&vq->vq.list, &vdev->vqs); 2114 spin_unlock(&vdev->vqs_list_lock); 2115 return &vq->vq; 2116 2117 err_state_extra: 2118 kfree(vq); 2119 err_vq: 2120 vring_free_packed(&vring_packed, vdev, dma_dev); 2121 err_ring: 2122 return NULL; 2123 } 2124 2125 static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num) 2126 { 2127 struct vring_virtqueue_packed vring_packed = {}; 2128 struct vring_virtqueue *vq = to_vvq(_vq); 2129 struct virtio_device *vdev = _vq->vdev; 2130 int err; 2131 2132 if (vring_alloc_queue_packed(&vring_packed, vdev, num, vring_dma_dev(vq))) 2133 goto err_ring; 2134 2135 err = vring_alloc_state_extra_packed(&vring_packed); 2136 if (err) 2137 goto err_state_extra; 2138 2139 vring_free(&vq->vq); 2140 2141 virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback); 2142 2143 virtqueue_init(vq, vring_packed.vring.num); 2144 virtqueue_vring_attach_packed(vq, &vring_packed); 2145 2146 return 0; 2147 2148 err_state_extra: 2149 vring_free_packed(&vring_packed, vdev, vring_dma_dev(vq)); 2150 err_ring: 2151 virtqueue_reinit_packed(vq); 2152 return -ENOMEM; 2153 } 2154 2155 static int virtqueue_disable_and_recycle(struct virtqueue *_vq, 2156 void (*recycle)(struct virtqueue *vq, void *buf)) 2157 { 2158 struct vring_virtqueue *vq = to_vvq(_vq); 2159 struct virtio_device *vdev = vq->vq.vdev; 2160 void *buf; 2161 int err; 2162 2163 if (!vq->we_own_ring) 2164 return -EPERM; 2165 2166 if (!vdev->config->disable_vq_and_reset) 2167 return -ENOENT; 2168 2169 if (!vdev->config->enable_vq_after_reset) 2170 return -ENOENT; 2171 2172 err = vdev->config->disable_vq_and_reset(_vq); 2173 if (err) 2174 return err; 2175 2176 while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) 2177 recycle(_vq, buf); 2178 2179 return 0; 2180 } 2181 2182 static int virtqueue_enable_after_reset(struct virtqueue *_vq) 2183 { 2184 struct vring_virtqueue *vq = to_vvq(_vq); 2185 struct virtio_device *vdev = vq->vq.vdev; 2186 2187 if (vdev->config->enable_vq_after_reset(_vq)) 2188 return -EBUSY; 2189 2190 return 0; 2191 } 2192 2193 /* 2194 * Generic functions and exported symbols. 2195 */ 2196 2197 static inline int virtqueue_add(struct virtqueue *_vq, 2198 struct scatterlist *sgs[], 2199 unsigned int total_sg, 2200 unsigned int out_sgs, 2201 unsigned int in_sgs, 2202 void *data, 2203 void *ctx, 2204 gfp_t gfp) 2205 { 2206 struct vring_virtqueue *vq = to_vvq(_vq); 2207 2208 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 2209 out_sgs, in_sgs, data, ctx, gfp) : 2210 virtqueue_add_split(_vq, sgs, total_sg, 2211 out_sgs, in_sgs, data, ctx, gfp); 2212 } 2213 2214 /** 2215 * virtqueue_add_sgs - expose buffers to other end 2216 * @_vq: the struct virtqueue we're talking about. 2217 * @sgs: array of terminated scatterlists. 2218 * @out_sgs: the number of scatterlists readable by other side 2219 * @in_sgs: the number of scatterlists which are writable (after readable ones) 2220 * @data: the token identifying the buffer. 2221 * @gfp: how to do memory allocations (if necessary). 2222 * 2223 * Caller must ensure we don't call this with other virtqueue operations 2224 * at the same time (except where noted). 2225 * 2226 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2227 */ 2228 int virtqueue_add_sgs(struct virtqueue *_vq, 2229 struct scatterlist *sgs[], 2230 unsigned int out_sgs, 2231 unsigned int in_sgs, 2232 void *data, 2233 gfp_t gfp) 2234 { 2235 unsigned int i, total_sg = 0; 2236 2237 /* Count them first. */ 2238 for (i = 0; i < out_sgs + in_sgs; i++) { 2239 struct scatterlist *sg; 2240 2241 for (sg = sgs[i]; sg; sg = sg_next(sg)) 2242 total_sg++; 2243 } 2244 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 2245 data, NULL, gfp); 2246 } 2247 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 2248 2249 /** 2250 * virtqueue_add_outbuf - expose output buffers to other end 2251 * @vq: the struct virtqueue we're talking about. 2252 * @sg: scatterlist (must be well-formed and terminated!) 2253 * @num: the number of entries in @sg readable by other side 2254 * @data: the token identifying the buffer. 2255 * @gfp: how to do memory allocations (if necessary). 2256 * 2257 * Caller must ensure we don't call this with other virtqueue operations 2258 * at the same time (except where noted). 2259 * 2260 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2261 */ 2262 int virtqueue_add_outbuf(struct virtqueue *vq, 2263 struct scatterlist *sg, unsigned int num, 2264 void *data, 2265 gfp_t gfp) 2266 { 2267 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 2268 } 2269 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 2270 2271 /** 2272 * virtqueue_add_inbuf - expose input buffers to other end 2273 * @vq: the struct virtqueue we're talking about. 2274 * @sg: scatterlist (must be well-formed and terminated!) 2275 * @num: the number of entries in @sg writable by other side 2276 * @data: the token identifying the buffer. 2277 * @gfp: how to do memory allocations (if necessary). 2278 * 2279 * Caller must ensure we don't call this with other virtqueue operations 2280 * at the same time (except where noted). 2281 * 2282 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2283 */ 2284 int virtqueue_add_inbuf(struct virtqueue *vq, 2285 struct scatterlist *sg, unsigned int num, 2286 void *data, 2287 gfp_t gfp) 2288 { 2289 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 2290 } 2291 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 2292 2293 /** 2294 * virtqueue_add_inbuf_ctx - expose input buffers to other end 2295 * @vq: the struct virtqueue we're talking about. 2296 * @sg: scatterlist (must be well-formed and terminated!) 2297 * @num: the number of entries in @sg writable by other side 2298 * @data: the token identifying the buffer. 2299 * @ctx: extra context for the token 2300 * @gfp: how to do memory allocations (if necessary). 2301 * 2302 * Caller must ensure we don't call this with other virtqueue operations 2303 * at the same time (except where noted). 2304 * 2305 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2306 */ 2307 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 2308 struct scatterlist *sg, unsigned int num, 2309 void *data, 2310 void *ctx, 2311 gfp_t gfp) 2312 { 2313 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 2314 } 2315 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 2316 2317 /** 2318 * virtqueue_dma_dev - get the dma dev 2319 * @_vq: the struct virtqueue we're talking about. 2320 * 2321 * Returns the dma dev. That can been used for dma api. 2322 */ 2323 struct device *virtqueue_dma_dev(struct virtqueue *_vq) 2324 { 2325 struct vring_virtqueue *vq = to_vvq(_vq); 2326 2327 if (vq->use_dma_api) 2328 return vring_dma_dev(vq); 2329 else 2330 return NULL; 2331 } 2332 EXPORT_SYMBOL_GPL(virtqueue_dma_dev); 2333 2334 /** 2335 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 2336 * @_vq: the struct virtqueue 2337 * 2338 * Instead of virtqueue_kick(), you can do: 2339 * if (virtqueue_kick_prepare(vq)) 2340 * virtqueue_notify(vq); 2341 * 2342 * This is sometimes useful because the virtqueue_kick_prepare() needs 2343 * to be serialized, but the actual virtqueue_notify() call does not. 2344 */ 2345 bool virtqueue_kick_prepare(struct virtqueue *_vq) 2346 { 2347 struct vring_virtqueue *vq = to_vvq(_vq); 2348 2349 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 2350 virtqueue_kick_prepare_split(_vq); 2351 } 2352 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 2353 2354 /** 2355 * virtqueue_notify - second half of split virtqueue_kick call. 2356 * @_vq: the struct virtqueue 2357 * 2358 * This does not need to be serialized. 2359 * 2360 * Returns false if host notify failed or queue is broken, otherwise true. 2361 */ 2362 bool virtqueue_notify(struct virtqueue *_vq) 2363 { 2364 struct vring_virtqueue *vq = to_vvq(_vq); 2365 2366 if (unlikely(vq->broken)) 2367 return false; 2368 2369 /* Prod other side to tell it about changes. */ 2370 if (!vq->notify(_vq)) { 2371 vq->broken = true; 2372 return false; 2373 } 2374 return true; 2375 } 2376 EXPORT_SYMBOL_GPL(virtqueue_notify); 2377 2378 /** 2379 * virtqueue_kick - update after add_buf 2380 * @vq: the struct virtqueue 2381 * 2382 * After one or more virtqueue_add_* calls, invoke this to kick 2383 * the other side. 2384 * 2385 * Caller must ensure we don't call this with other virtqueue 2386 * operations at the same time (except where noted). 2387 * 2388 * Returns false if kick failed, otherwise true. 2389 */ 2390 bool virtqueue_kick(struct virtqueue *vq) 2391 { 2392 if (virtqueue_kick_prepare(vq)) 2393 return virtqueue_notify(vq); 2394 return true; 2395 } 2396 EXPORT_SYMBOL_GPL(virtqueue_kick); 2397 2398 /** 2399 * virtqueue_get_buf_ctx - get the next used buffer 2400 * @_vq: the struct virtqueue we're talking about. 2401 * @len: the length written into the buffer 2402 * @ctx: extra context for the token 2403 * 2404 * If the device wrote data into the buffer, @len will be set to the 2405 * amount written. This means you don't need to clear the buffer 2406 * beforehand to ensure there's no data leakage in the case of short 2407 * writes. 2408 * 2409 * Caller must ensure we don't call this with other virtqueue 2410 * operations at the same time (except where noted). 2411 * 2412 * Returns NULL if there are no used buffers, or the "data" token 2413 * handed to virtqueue_add_*(). 2414 */ 2415 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 2416 void **ctx) 2417 { 2418 struct vring_virtqueue *vq = to_vvq(_vq); 2419 2420 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 2421 virtqueue_get_buf_ctx_split(_vq, len, ctx); 2422 } 2423 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 2424 2425 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 2426 { 2427 return virtqueue_get_buf_ctx(_vq, len, NULL); 2428 } 2429 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 2430 /** 2431 * virtqueue_disable_cb - disable callbacks 2432 * @_vq: the struct virtqueue we're talking about. 2433 * 2434 * Note that this is not necessarily synchronous, hence unreliable and only 2435 * useful as an optimization. 2436 * 2437 * Unlike other operations, this need not be serialized. 2438 */ 2439 void virtqueue_disable_cb(struct virtqueue *_vq) 2440 { 2441 struct vring_virtqueue *vq = to_vvq(_vq); 2442 2443 if (vq->packed_ring) 2444 virtqueue_disable_cb_packed(_vq); 2445 else 2446 virtqueue_disable_cb_split(_vq); 2447 } 2448 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 2449 2450 /** 2451 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 2452 * @_vq: the struct virtqueue we're talking about. 2453 * 2454 * This re-enables callbacks; it returns current queue state 2455 * in an opaque unsigned value. This value should be later tested by 2456 * virtqueue_poll, to detect a possible race between the driver checking for 2457 * more work, and enabling callbacks. 2458 * 2459 * Caller must ensure we don't call this with other virtqueue 2460 * operations at the same time (except where noted). 2461 */ 2462 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq) 2463 { 2464 struct vring_virtqueue *vq = to_vvq(_vq); 2465 2466 if (vq->event_triggered) 2467 vq->event_triggered = false; 2468 2469 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 2470 virtqueue_enable_cb_prepare_split(_vq); 2471 } 2472 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 2473 2474 /** 2475 * virtqueue_poll - query pending used buffers 2476 * @_vq: the struct virtqueue we're talking about. 2477 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 2478 * 2479 * Returns "true" if there are pending used buffers in the queue. 2480 * 2481 * This does not need to be serialized. 2482 */ 2483 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx) 2484 { 2485 struct vring_virtqueue *vq = to_vvq(_vq); 2486 2487 if (unlikely(vq->broken)) 2488 return false; 2489 2490 virtio_mb(vq->weak_barriers); 2491 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 2492 virtqueue_poll_split(_vq, last_used_idx); 2493 } 2494 EXPORT_SYMBOL_GPL(virtqueue_poll); 2495 2496 /** 2497 * virtqueue_enable_cb - restart callbacks after disable_cb. 2498 * @_vq: the struct virtqueue we're talking about. 2499 * 2500 * This re-enables callbacks; it returns "false" if there are pending 2501 * buffers in the queue, to detect a possible race between the driver 2502 * checking for more work, and enabling callbacks. 2503 * 2504 * Caller must ensure we don't call this with other virtqueue 2505 * operations at the same time (except where noted). 2506 */ 2507 bool virtqueue_enable_cb(struct virtqueue *_vq) 2508 { 2509 unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq); 2510 2511 return !virtqueue_poll(_vq, last_used_idx); 2512 } 2513 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 2514 2515 /** 2516 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 2517 * @_vq: the struct virtqueue we're talking about. 2518 * 2519 * This re-enables callbacks but hints to the other side to delay 2520 * interrupts until most of the available buffers have been processed; 2521 * it returns "false" if there are many pending buffers in the queue, 2522 * to detect a possible race between the driver checking for more work, 2523 * and enabling callbacks. 2524 * 2525 * Caller must ensure we don't call this with other virtqueue 2526 * operations at the same time (except where noted). 2527 */ 2528 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2529 { 2530 struct vring_virtqueue *vq = to_vvq(_vq); 2531 2532 if (vq->event_triggered) 2533 vq->event_triggered = false; 2534 2535 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2536 virtqueue_enable_cb_delayed_split(_vq); 2537 } 2538 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2539 2540 /** 2541 * virtqueue_detach_unused_buf - detach first unused buffer 2542 * @_vq: the struct virtqueue we're talking about. 2543 * 2544 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2545 * This is not valid on an active queue; it is useful for device 2546 * shutdown or the reset queue. 2547 */ 2548 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2549 { 2550 struct vring_virtqueue *vq = to_vvq(_vq); 2551 2552 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2553 virtqueue_detach_unused_buf_split(_vq); 2554 } 2555 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2556 2557 static inline bool more_used(const struct vring_virtqueue *vq) 2558 { 2559 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2560 } 2561 2562 /** 2563 * vring_interrupt - notify a virtqueue on an interrupt 2564 * @irq: the IRQ number (ignored) 2565 * @_vq: the struct virtqueue to notify 2566 * 2567 * Calls the callback function of @_vq to process the virtqueue 2568 * notification. 2569 */ 2570 irqreturn_t vring_interrupt(int irq, void *_vq) 2571 { 2572 struct vring_virtqueue *vq = to_vvq(_vq); 2573 2574 if (!more_used(vq)) { 2575 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2576 return IRQ_NONE; 2577 } 2578 2579 if (unlikely(vq->broken)) { 2580 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 2581 dev_warn_once(&vq->vq.vdev->dev, 2582 "virtio vring IRQ raised before DRIVER_OK"); 2583 return IRQ_NONE; 2584 #else 2585 return IRQ_HANDLED; 2586 #endif 2587 } 2588 2589 /* Just a hint for performance: so it's ok that this can be racy! */ 2590 if (vq->event) 2591 data_race(vq->event_triggered = true); 2592 2593 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2594 if (vq->vq.callback) 2595 vq->vq.callback(&vq->vq); 2596 2597 return IRQ_HANDLED; 2598 } 2599 EXPORT_SYMBOL_GPL(vring_interrupt); 2600 2601 /* Only available for split ring */ 2602 static struct virtqueue *__vring_new_virtqueue(unsigned int index, 2603 struct vring_virtqueue_split *vring_split, 2604 struct virtio_device *vdev, 2605 bool weak_barriers, 2606 bool context, 2607 bool (*notify)(struct virtqueue *), 2608 void (*callback)(struct virtqueue *), 2609 const char *name, 2610 struct device *dma_dev) 2611 { 2612 struct vring_virtqueue *vq; 2613 int err; 2614 2615 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2616 return NULL; 2617 2618 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2619 if (!vq) 2620 return NULL; 2621 2622 vq->packed_ring = false; 2623 vq->vq.callback = callback; 2624 vq->vq.vdev = vdev; 2625 vq->vq.name = name; 2626 vq->vq.index = index; 2627 vq->vq.reset = false; 2628 vq->we_own_ring = false; 2629 vq->notify = notify; 2630 vq->weak_barriers = weak_barriers; 2631 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 2632 vq->broken = true; 2633 #else 2634 vq->broken = false; 2635 #endif 2636 vq->dma_dev = dma_dev; 2637 vq->use_dma_api = vring_use_dma_api(vdev); 2638 vq->premapped = false; 2639 vq->do_unmap = vq->use_dma_api; 2640 2641 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2642 !context; 2643 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2644 2645 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2646 vq->weak_barriers = false; 2647 2648 err = vring_alloc_state_extra_split(vring_split); 2649 if (err) { 2650 kfree(vq); 2651 return NULL; 2652 } 2653 2654 virtqueue_vring_init_split(vring_split, vq); 2655 2656 virtqueue_init(vq, vring_split->vring.num); 2657 virtqueue_vring_attach_split(vq, vring_split); 2658 2659 spin_lock(&vdev->vqs_list_lock); 2660 list_add_tail(&vq->vq.list, &vdev->vqs); 2661 spin_unlock(&vdev->vqs_list_lock); 2662 return &vq->vq; 2663 } 2664 2665 struct virtqueue *vring_create_virtqueue( 2666 unsigned int index, 2667 unsigned int num, 2668 unsigned int vring_align, 2669 struct virtio_device *vdev, 2670 bool weak_barriers, 2671 bool may_reduce_num, 2672 bool context, 2673 bool (*notify)(struct virtqueue *), 2674 void (*callback)(struct virtqueue *), 2675 const char *name) 2676 { 2677 2678 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2679 return vring_create_virtqueue_packed(index, num, vring_align, 2680 vdev, weak_barriers, may_reduce_num, 2681 context, notify, callback, name, vdev->dev.parent); 2682 2683 return vring_create_virtqueue_split(index, num, vring_align, 2684 vdev, weak_barriers, may_reduce_num, 2685 context, notify, callback, name, vdev->dev.parent); 2686 } 2687 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2688 2689 struct virtqueue *vring_create_virtqueue_dma( 2690 unsigned int index, 2691 unsigned int num, 2692 unsigned int vring_align, 2693 struct virtio_device *vdev, 2694 bool weak_barriers, 2695 bool may_reduce_num, 2696 bool context, 2697 bool (*notify)(struct virtqueue *), 2698 void (*callback)(struct virtqueue *), 2699 const char *name, 2700 struct device *dma_dev) 2701 { 2702 2703 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2704 return vring_create_virtqueue_packed(index, num, vring_align, 2705 vdev, weak_barriers, may_reduce_num, 2706 context, notify, callback, name, dma_dev); 2707 2708 return vring_create_virtqueue_split(index, num, vring_align, 2709 vdev, weak_barriers, may_reduce_num, 2710 context, notify, callback, name, dma_dev); 2711 } 2712 EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); 2713 2714 /** 2715 * virtqueue_resize - resize the vring of vq 2716 * @_vq: the struct virtqueue we're talking about. 2717 * @num: new ring num 2718 * @recycle: callback to recycle unused buffers 2719 * 2720 * When it is really necessary to create a new vring, it will set the current vq 2721 * into the reset state. Then call the passed callback to recycle the buffer 2722 * that is no longer used. Only after the new vring is successfully created, the 2723 * old vring will be released. 2724 * 2725 * Caller must ensure we don't call this with other virtqueue operations 2726 * at the same time (except where noted). 2727 * 2728 * Returns zero or a negative error. 2729 * 0: success. 2730 * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size. 2731 * vq can still work normally 2732 * -EBUSY: Failed to sync with device, vq may not work properly 2733 * -ENOENT: Transport or device not supported 2734 * -E2BIG/-EINVAL: num error 2735 * -EPERM: Operation not permitted 2736 * 2737 */ 2738 int virtqueue_resize(struct virtqueue *_vq, u32 num, 2739 void (*recycle)(struct virtqueue *vq, void *buf)) 2740 { 2741 struct vring_virtqueue *vq = to_vvq(_vq); 2742 int err; 2743 2744 if (num > vq->vq.num_max) 2745 return -E2BIG; 2746 2747 if (!num) 2748 return -EINVAL; 2749 2750 if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num) 2751 return 0; 2752 2753 err = virtqueue_disable_and_recycle(_vq, recycle); 2754 if (err) 2755 return err; 2756 2757 if (vq->packed_ring) 2758 err = virtqueue_resize_packed(_vq, num); 2759 else 2760 err = virtqueue_resize_split(_vq, num); 2761 2762 return virtqueue_enable_after_reset(_vq); 2763 } 2764 EXPORT_SYMBOL_GPL(virtqueue_resize); 2765 2766 /** 2767 * virtqueue_set_dma_premapped - set the vring premapped mode 2768 * @_vq: the struct virtqueue we're talking about. 2769 * 2770 * Enable the premapped mode of the vq. 2771 * 2772 * The vring in premapped mode does not do dma internally, so the driver must 2773 * do dma mapping in advance. The driver must pass the dma_address through 2774 * dma_address of scatterlist. When the driver got a used buffer from 2775 * the vring, it has to unmap the dma address. 2776 * 2777 * This function must be called immediately after creating the vq, or after vq 2778 * reset, and before adding any buffers to it. 2779 * 2780 * Caller must ensure we don't call this with other virtqueue operations 2781 * at the same time (except where noted). 2782 * 2783 * Returns zero or a negative error. 2784 * 0: success. 2785 * -EINVAL: too late to enable premapped mode, the vq already contains buffers. 2786 */ 2787 int virtqueue_set_dma_premapped(struct virtqueue *_vq) 2788 { 2789 struct vring_virtqueue *vq = to_vvq(_vq); 2790 u32 num; 2791 2792 START_USE(vq); 2793 2794 num = vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2795 2796 if (num != vq->vq.num_free) { 2797 END_USE(vq); 2798 return -EINVAL; 2799 } 2800 2801 vq->premapped = true; 2802 vq->do_unmap = false; 2803 2804 END_USE(vq); 2805 2806 return 0; 2807 } 2808 EXPORT_SYMBOL_GPL(virtqueue_set_dma_premapped); 2809 2810 /** 2811 * virtqueue_reset - detach and recycle all unused buffers 2812 * @_vq: the struct virtqueue we're talking about. 2813 * @recycle: callback to recycle unused buffers 2814 * 2815 * Caller must ensure we don't call this with other virtqueue operations 2816 * at the same time (except where noted). 2817 * 2818 * Returns zero or a negative error. 2819 * 0: success. 2820 * -EBUSY: Failed to sync with device, vq may not work properly 2821 * -ENOENT: Transport or device not supported 2822 * -EPERM: Operation not permitted 2823 */ 2824 int virtqueue_reset(struct virtqueue *_vq, 2825 void (*recycle)(struct virtqueue *vq, void *buf)) 2826 { 2827 struct vring_virtqueue *vq = to_vvq(_vq); 2828 int err; 2829 2830 err = virtqueue_disable_and_recycle(_vq, recycle); 2831 if (err) 2832 return err; 2833 2834 if (vq->packed_ring) 2835 virtqueue_reinit_packed(vq); 2836 else 2837 virtqueue_reinit_split(vq); 2838 2839 return virtqueue_enable_after_reset(_vq); 2840 } 2841 EXPORT_SYMBOL_GPL(virtqueue_reset); 2842 2843 /* Only available for split ring */ 2844 struct virtqueue *vring_new_virtqueue(unsigned int index, 2845 unsigned int num, 2846 unsigned int vring_align, 2847 struct virtio_device *vdev, 2848 bool weak_barriers, 2849 bool context, 2850 void *pages, 2851 bool (*notify)(struct virtqueue *vq), 2852 void (*callback)(struct virtqueue *vq), 2853 const char *name) 2854 { 2855 struct vring_virtqueue_split vring_split = {}; 2856 2857 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2858 return NULL; 2859 2860 vring_init(&vring_split.vring, num, pages, vring_align); 2861 return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers, 2862 context, notify, callback, name, 2863 vdev->dev.parent); 2864 } 2865 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2866 2867 static void vring_free(struct virtqueue *_vq) 2868 { 2869 struct vring_virtqueue *vq = to_vvq(_vq); 2870 2871 if (vq->we_own_ring) { 2872 if (vq->packed_ring) { 2873 vring_free_queue(vq->vq.vdev, 2874 vq->packed.ring_size_in_bytes, 2875 vq->packed.vring.desc, 2876 vq->packed.ring_dma_addr, 2877 vring_dma_dev(vq)); 2878 2879 vring_free_queue(vq->vq.vdev, 2880 vq->packed.event_size_in_bytes, 2881 vq->packed.vring.driver, 2882 vq->packed.driver_event_dma_addr, 2883 vring_dma_dev(vq)); 2884 2885 vring_free_queue(vq->vq.vdev, 2886 vq->packed.event_size_in_bytes, 2887 vq->packed.vring.device, 2888 vq->packed.device_event_dma_addr, 2889 vring_dma_dev(vq)); 2890 2891 kfree(vq->packed.desc_state); 2892 kfree(vq->packed.desc_extra); 2893 } else { 2894 vring_free_queue(vq->vq.vdev, 2895 vq->split.queue_size_in_bytes, 2896 vq->split.vring.desc, 2897 vq->split.queue_dma_addr, 2898 vring_dma_dev(vq)); 2899 } 2900 } 2901 if (!vq->packed_ring) { 2902 kfree(vq->split.desc_state); 2903 kfree(vq->split.desc_extra); 2904 } 2905 } 2906 2907 void vring_del_virtqueue(struct virtqueue *_vq) 2908 { 2909 struct vring_virtqueue *vq = to_vvq(_vq); 2910 2911 spin_lock(&vq->vq.vdev->vqs_list_lock); 2912 list_del(&_vq->list); 2913 spin_unlock(&vq->vq.vdev->vqs_list_lock); 2914 2915 vring_free(_vq); 2916 2917 kfree(vq); 2918 } 2919 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2920 2921 u32 vring_notification_data(struct virtqueue *_vq) 2922 { 2923 struct vring_virtqueue *vq = to_vvq(_vq); 2924 u16 next; 2925 2926 if (vq->packed_ring) 2927 next = (vq->packed.next_avail_idx & 2928 ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) | 2929 vq->packed.avail_wrap_counter << 2930 VRING_PACKED_EVENT_F_WRAP_CTR; 2931 else 2932 next = vq->split.avail_idx_shadow; 2933 2934 return next << 16 | _vq->index; 2935 } 2936 EXPORT_SYMBOL_GPL(vring_notification_data); 2937 2938 /* Manipulates transport-specific feature bits. */ 2939 void vring_transport_features(struct virtio_device *vdev) 2940 { 2941 unsigned int i; 2942 2943 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2944 switch (i) { 2945 case VIRTIO_RING_F_INDIRECT_DESC: 2946 break; 2947 case VIRTIO_RING_F_EVENT_IDX: 2948 break; 2949 case VIRTIO_F_VERSION_1: 2950 break; 2951 case VIRTIO_F_ACCESS_PLATFORM: 2952 break; 2953 case VIRTIO_F_RING_PACKED: 2954 break; 2955 case VIRTIO_F_ORDER_PLATFORM: 2956 break; 2957 case VIRTIO_F_NOTIFICATION_DATA: 2958 break; 2959 default: 2960 /* We don't understand this bit. */ 2961 __virtio_clear_bit(vdev, i); 2962 } 2963 } 2964 } 2965 EXPORT_SYMBOL_GPL(vring_transport_features); 2966 2967 /** 2968 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2969 * @_vq: the struct virtqueue containing the vring of interest. 2970 * 2971 * Returns the size of the vring. This is mainly used for boasting to 2972 * userspace. Unlike other operations, this need not be serialized. 2973 */ 2974 unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq) 2975 { 2976 2977 const struct vring_virtqueue *vq = to_vvq(_vq); 2978 2979 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2980 } 2981 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2982 2983 /* 2984 * This function should only be called by the core, not directly by the driver. 2985 */ 2986 void __virtqueue_break(struct virtqueue *_vq) 2987 { 2988 struct vring_virtqueue *vq = to_vvq(_vq); 2989 2990 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2991 WRITE_ONCE(vq->broken, true); 2992 } 2993 EXPORT_SYMBOL_GPL(__virtqueue_break); 2994 2995 /* 2996 * This function should only be called by the core, not directly by the driver. 2997 */ 2998 void __virtqueue_unbreak(struct virtqueue *_vq) 2999 { 3000 struct vring_virtqueue *vq = to_vvq(_vq); 3001 3002 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 3003 WRITE_ONCE(vq->broken, false); 3004 } 3005 EXPORT_SYMBOL_GPL(__virtqueue_unbreak); 3006 3007 bool virtqueue_is_broken(const struct virtqueue *_vq) 3008 { 3009 const struct vring_virtqueue *vq = to_vvq(_vq); 3010 3011 return READ_ONCE(vq->broken); 3012 } 3013 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 3014 3015 /* 3016 * This should prevent the device from being used, allowing drivers to 3017 * recover. You may need to grab appropriate locks to flush. 3018 */ 3019 void virtio_break_device(struct virtio_device *dev) 3020 { 3021 struct virtqueue *_vq; 3022 3023 spin_lock(&dev->vqs_list_lock); 3024 list_for_each_entry(_vq, &dev->vqs, list) { 3025 struct vring_virtqueue *vq = to_vvq(_vq); 3026 3027 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 3028 WRITE_ONCE(vq->broken, true); 3029 } 3030 spin_unlock(&dev->vqs_list_lock); 3031 } 3032 EXPORT_SYMBOL_GPL(virtio_break_device); 3033 3034 /* 3035 * This should allow the device to be used by the driver. You may 3036 * need to grab appropriate locks to flush the write to 3037 * vq->broken. This should only be used in some specific case e.g 3038 * (probing and restoring). This function should only be called by the 3039 * core, not directly by the driver. 3040 */ 3041 void __virtio_unbreak_device(struct virtio_device *dev) 3042 { 3043 struct virtqueue *_vq; 3044 3045 spin_lock(&dev->vqs_list_lock); 3046 list_for_each_entry(_vq, &dev->vqs, list) { 3047 struct vring_virtqueue *vq = to_vvq(_vq); 3048 3049 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 3050 WRITE_ONCE(vq->broken, false); 3051 } 3052 spin_unlock(&dev->vqs_list_lock); 3053 } 3054 EXPORT_SYMBOL_GPL(__virtio_unbreak_device); 3055 3056 dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq) 3057 { 3058 const struct vring_virtqueue *vq = to_vvq(_vq); 3059 3060 BUG_ON(!vq->we_own_ring); 3061 3062 if (vq->packed_ring) 3063 return vq->packed.ring_dma_addr; 3064 3065 return vq->split.queue_dma_addr; 3066 } 3067 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 3068 3069 dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq) 3070 { 3071 const struct vring_virtqueue *vq = to_vvq(_vq); 3072 3073 BUG_ON(!vq->we_own_ring); 3074 3075 if (vq->packed_ring) 3076 return vq->packed.driver_event_dma_addr; 3077 3078 return vq->split.queue_dma_addr + 3079 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 3080 } 3081 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 3082 3083 dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq) 3084 { 3085 const struct vring_virtqueue *vq = to_vvq(_vq); 3086 3087 BUG_ON(!vq->we_own_ring); 3088 3089 if (vq->packed_ring) 3090 return vq->packed.device_event_dma_addr; 3091 3092 return vq->split.queue_dma_addr + 3093 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 3094 } 3095 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 3096 3097 /* Only available for split ring */ 3098 const struct vring *virtqueue_get_vring(const struct virtqueue *vq) 3099 { 3100 return &to_vvq(vq)->split.vring; 3101 } 3102 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 3103 3104 /** 3105 * virtqueue_dma_map_single_attrs - map DMA for _vq 3106 * @_vq: the struct virtqueue we're talking about. 3107 * @ptr: the pointer of the buffer to do dma 3108 * @size: the size of the buffer to do dma 3109 * @dir: DMA direction 3110 * @attrs: DMA Attrs 3111 * 3112 * The caller calls this to do dma mapping in advance. The DMA address can be 3113 * passed to this _vq when it is in pre-mapped mode. 3114 * 3115 * return DMA address. Caller should check that by virtqueue_dma_mapping_error(). 3116 */ 3117 dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, 3118 size_t size, 3119 enum dma_data_direction dir, 3120 unsigned long attrs) 3121 { 3122 struct vring_virtqueue *vq = to_vvq(_vq); 3123 3124 if (!vq->use_dma_api) { 3125 kmsan_handle_dma(virt_to_page(ptr), offset_in_page(ptr), size, dir); 3126 return (dma_addr_t)virt_to_phys(ptr); 3127 } 3128 3129 return dma_map_single_attrs(vring_dma_dev(vq), ptr, size, dir, attrs); 3130 } 3131 EXPORT_SYMBOL_GPL(virtqueue_dma_map_single_attrs); 3132 3133 /** 3134 * virtqueue_dma_unmap_single_attrs - unmap DMA for _vq 3135 * @_vq: the struct virtqueue we're talking about. 3136 * @addr: the dma address to unmap 3137 * @size: the size of the buffer 3138 * @dir: DMA direction 3139 * @attrs: DMA Attrs 3140 * 3141 * Unmap the address that is mapped by the virtqueue_dma_map_* APIs. 3142 * 3143 */ 3144 void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, 3145 size_t size, enum dma_data_direction dir, 3146 unsigned long attrs) 3147 { 3148 struct vring_virtqueue *vq = to_vvq(_vq); 3149 3150 if (!vq->use_dma_api) 3151 return; 3152 3153 dma_unmap_single_attrs(vring_dma_dev(vq), addr, size, dir, attrs); 3154 } 3155 EXPORT_SYMBOL_GPL(virtqueue_dma_unmap_single_attrs); 3156 3157 /** 3158 * virtqueue_dma_mapping_error - check dma address 3159 * @_vq: the struct virtqueue we're talking about. 3160 * @addr: DMA address 3161 * 3162 * Returns 0 means dma valid. Other means invalid dma address. 3163 */ 3164 int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr) 3165 { 3166 struct vring_virtqueue *vq = to_vvq(_vq); 3167 3168 if (!vq->use_dma_api) 3169 return 0; 3170 3171 return dma_mapping_error(vring_dma_dev(vq), addr); 3172 } 3173 EXPORT_SYMBOL_GPL(virtqueue_dma_mapping_error); 3174 3175 /** 3176 * virtqueue_dma_need_sync - check a dma address needs sync 3177 * @_vq: the struct virtqueue we're talking about. 3178 * @addr: DMA address 3179 * 3180 * Check if the dma address mapped by the virtqueue_dma_map_* APIs needs to be 3181 * synchronized 3182 * 3183 * return bool 3184 */ 3185 bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr) 3186 { 3187 struct vring_virtqueue *vq = to_vvq(_vq); 3188 3189 if (!vq->use_dma_api) 3190 return false; 3191 3192 return dma_need_sync(vring_dma_dev(vq), addr); 3193 } 3194 EXPORT_SYMBOL_GPL(virtqueue_dma_need_sync); 3195 3196 /** 3197 * virtqueue_dma_sync_single_range_for_cpu - dma sync for cpu 3198 * @_vq: the struct virtqueue we're talking about. 3199 * @addr: DMA address 3200 * @offset: DMA address offset 3201 * @size: buf size for sync 3202 * @dir: DMA direction 3203 * 3204 * Before calling this function, use virtqueue_dma_need_sync() to confirm that 3205 * the DMA address really needs to be synchronized 3206 * 3207 */ 3208 void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, 3209 dma_addr_t addr, 3210 unsigned long offset, size_t size, 3211 enum dma_data_direction dir) 3212 { 3213 struct vring_virtqueue *vq = to_vvq(_vq); 3214 struct device *dev = vring_dma_dev(vq); 3215 3216 if (!vq->use_dma_api) 3217 return; 3218 3219 dma_sync_single_range_for_cpu(dev, addr, offset, size, dir); 3220 } 3221 EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_cpu); 3222 3223 /** 3224 * virtqueue_dma_sync_single_range_for_device - dma sync for device 3225 * @_vq: the struct virtqueue we're talking about. 3226 * @addr: DMA address 3227 * @offset: DMA address offset 3228 * @size: buf size for sync 3229 * @dir: DMA direction 3230 * 3231 * Before calling this function, use virtqueue_dma_need_sync() to confirm that 3232 * the DMA address really needs to be synchronized 3233 */ 3234 void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, 3235 dma_addr_t addr, 3236 unsigned long offset, size_t size, 3237 enum dma_data_direction dir) 3238 { 3239 struct vring_virtqueue *vq = to_vvq(_vq); 3240 struct device *dev = vring_dma_dev(vq); 3241 3242 if (!vq->use_dma_api) 3243 return; 3244 3245 dma_sync_single_range_for_device(dev, addr, offset, size, dir); 3246 } 3247 EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_device); 3248 3249 MODULE_DESCRIPTION("Virtio ring implementation"); 3250 MODULE_LICENSE("GPL"); 3251