1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Virtio ring implementation. 3 * 4 * Copyright 2007 Rusty Russell IBM Corporation 5 */ 6 #include <linux/virtio.h> 7 #include <linux/virtio_ring.h> 8 #include <linux/virtio_config.h> 9 #include <linux/device.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/hrtimer.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/kmsan.h> 15 #include <linux/spinlock.h> 16 #include <xen/xen.h> 17 18 #ifdef DEBUG 19 /* For development, we want to crash whenever the ring is screwed. */ 20 #define BAD_RING(_vq, fmt, args...) \ 21 do { \ 22 dev_err(&(_vq)->vq.vdev->dev, \ 23 "%s:"fmt, (_vq)->vq.name, ##args); \ 24 BUG(); \ 25 } while (0) 26 /* Caller is supposed to guarantee no reentry. */ 27 #define START_USE(_vq) \ 28 do { \ 29 if ((_vq)->in_use) \ 30 panic("%s:in_use = %i\n", \ 31 (_vq)->vq.name, (_vq)->in_use); \ 32 (_vq)->in_use = __LINE__; \ 33 } while (0) 34 #define END_USE(_vq) \ 35 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 36 #define LAST_ADD_TIME_UPDATE(_vq) \ 37 do { \ 38 ktime_t now = ktime_get(); \ 39 \ 40 /* No kick or get, with .1 second between? Warn. */ \ 41 if ((_vq)->last_add_time_valid) \ 42 WARN_ON(ktime_to_ms(ktime_sub(now, \ 43 (_vq)->last_add_time)) > 100); \ 44 (_vq)->last_add_time = now; \ 45 (_vq)->last_add_time_valid = true; \ 46 } while (0) 47 #define LAST_ADD_TIME_CHECK(_vq) \ 48 do { \ 49 if ((_vq)->last_add_time_valid) { \ 50 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 51 (_vq)->last_add_time)) > 100); \ 52 } \ 53 } while (0) 54 #define LAST_ADD_TIME_INVALID(_vq) \ 55 ((_vq)->last_add_time_valid = false) 56 #else 57 #define BAD_RING(_vq, fmt, args...) \ 58 do { \ 59 dev_err(&_vq->vq.vdev->dev, \ 60 "%s:"fmt, (_vq)->vq.name, ##args); \ 61 (_vq)->broken = true; \ 62 } while (0) 63 #define START_USE(vq) 64 #define END_USE(vq) 65 #define LAST_ADD_TIME_UPDATE(vq) 66 #define LAST_ADD_TIME_CHECK(vq) 67 #define LAST_ADD_TIME_INVALID(vq) 68 #endif 69 70 struct vring_desc_state_split { 71 void *data; /* Data for callback. */ 72 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 73 }; 74 75 struct vring_desc_state_packed { 76 void *data; /* Data for callback. */ 77 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 78 u16 num; /* Descriptor list length. */ 79 u16 last; /* The last desc state in a list. */ 80 }; 81 82 struct vring_desc_extra { 83 dma_addr_t addr; /* Descriptor DMA addr. */ 84 u32 len; /* Descriptor length. */ 85 u16 flags; /* Descriptor flags. */ 86 u16 next; /* The next desc state in a list. */ 87 }; 88 89 struct vring_virtqueue_split { 90 /* Actual memory layout for this queue. */ 91 struct vring vring; 92 93 /* Last written value to avail->flags */ 94 u16 avail_flags_shadow; 95 96 /* 97 * Last written value to avail->idx in 98 * guest byte order. 99 */ 100 u16 avail_idx_shadow; 101 102 /* Per-descriptor state. */ 103 struct vring_desc_state_split *desc_state; 104 struct vring_desc_extra *desc_extra; 105 106 /* DMA address and size information */ 107 dma_addr_t queue_dma_addr; 108 size_t queue_size_in_bytes; 109 110 /* 111 * The parameters for creating vrings are reserved for creating new 112 * vring. 113 */ 114 u32 vring_align; 115 bool may_reduce_num; 116 }; 117 118 struct vring_virtqueue_packed { 119 /* Actual memory layout for this queue. */ 120 struct { 121 unsigned int num; 122 struct vring_packed_desc *desc; 123 struct vring_packed_desc_event *driver; 124 struct vring_packed_desc_event *device; 125 } vring; 126 127 /* Driver ring wrap counter. */ 128 bool avail_wrap_counter; 129 130 /* Avail used flags. */ 131 u16 avail_used_flags; 132 133 /* Index of the next avail descriptor. */ 134 u16 next_avail_idx; 135 136 /* 137 * Last written value to driver->flags in 138 * guest byte order. 139 */ 140 u16 event_flags_shadow; 141 142 /* Per-descriptor state. */ 143 struct vring_desc_state_packed *desc_state; 144 struct vring_desc_extra *desc_extra; 145 146 /* DMA address and size information */ 147 dma_addr_t ring_dma_addr; 148 dma_addr_t driver_event_dma_addr; 149 dma_addr_t device_event_dma_addr; 150 size_t ring_size_in_bytes; 151 size_t event_size_in_bytes; 152 }; 153 154 struct vring_virtqueue { 155 struct virtqueue vq; 156 157 /* Is this a packed ring? */ 158 bool packed_ring; 159 160 /* Is DMA API used? */ 161 bool use_dma_api; 162 163 /* Can we use weak barriers? */ 164 bool weak_barriers; 165 166 /* Other side has made a mess, don't try any more. */ 167 bool broken; 168 169 /* Host supports indirect buffers */ 170 bool indirect; 171 172 /* Host publishes avail event idx */ 173 bool event; 174 175 /* Head of free buffer list. */ 176 unsigned int free_head; 177 /* Number we've added since last sync. */ 178 unsigned int num_added; 179 180 /* Last used index we've seen. 181 * for split ring, it just contains last used index 182 * for packed ring: 183 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index. 184 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter. 185 */ 186 u16 last_used_idx; 187 188 /* Hint for event idx: already triggered no need to disable. */ 189 bool event_triggered; 190 191 union { 192 /* Available for split ring */ 193 struct vring_virtqueue_split split; 194 195 /* Available for packed ring */ 196 struct vring_virtqueue_packed packed; 197 }; 198 199 /* How to notify other side. FIXME: commonalize hcalls! */ 200 bool (*notify)(struct virtqueue *vq); 201 202 /* DMA, allocation, and size information */ 203 bool we_own_ring; 204 205 /* Device used for doing DMA */ 206 struct device *dma_dev; 207 208 #ifdef DEBUG 209 /* They're supposed to lock for us. */ 210 unsigned int in_use; 211 212 /* Figure out if their kicks are too delayed. */ 213 bool last_add_time_valid; 214 ktime_t last_add_time; 215 #endif 216 }; 217 218 static struct virtqueue *__vring_new_virtqueue(unsigned int index, 219 struct vring_virtqueue_split *vring_split, 220 struct virtio_device *vdev, 221 bool weak_barriers, 222 bool context, 223 bool (*notify)(struct virtqueue *), 224 void (*callback)(struct virtqueue *), 225 const char *name, 226 struct device *dma_dev); 227 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num); 228 static void vring_free(struct virtqueue *_vq); 229 230 /* 231 * Helpers. 232 */ 233 234 #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq) 235 236 static bool virtqueue_use_indirect(const struct vring_virtqueue *vq, 237 unsigned int total_sg) 238 { 239 /* 240 * If the host supports indirect descriptor tables, and we have multiple 241 * buffers, then go indirect. FIXME: tune this threshold 242 */ 243 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 244 } 245 246 /* 247 * Modern virtio devices have feature bits to specify whether they need a 248 * quirk and bypass the IOMMU. If not there, just use the DMA API. 249 * 250 * If there, the interaction between virtio and DMA API is messy. 251 * 252 * On most systems with virtio, physical addresses match bus addresses, 253 * and it doesn't particularly matter whether we use the DMA API. 254 * 255 * On some systems, including Xen and any system with a physical device 256 * that speaks virtio behind a physical IOMMU, we must use the DMA API 257 * for virtio DMA to work at all. 258 * 259 * On other systems, including SPARC and PPC64, virtio-pci devices are 260 * enumerated as though they are behind an IOMMU, but the virtio host 261 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 262 * there or somehow map everything as the identity. 263 * 264 * For the time being, we preserve historic behavior and bypass the DMA 265 * API. 266 * 267 * TODO: install a per-device DMA ops structure that does the right thing 268 * taking into account all the above quirks, and use the DMA API 269 * unconditionally on data path. 270 */ 271 272 static bool vring_use_dma_api(const struct virtio_device *vdev) 273 { 274 if (!virtio_has_dma_quirk(vdev)) 275 return true; 276 277 /* Otherwise, we are left to guess. */ 278 /* 279 * In theory, it's possible to have a buggy QEMU-supposed 280 * emulated Q35 IOMMU and Xen enabled at the same time. On 281 * such a configuration, virtio has never worked and will 282 * not work without an even larger kludge. Instead, enable 283 * the DMA API if we're a Xen guest, which at least allows 284 * all of the sensible Xen configurations to work correctly. 285 */ 286 if (xen_domain()) 287 return true; 288 289 return false; 290 } 291 292 size_t virtio_max_dma_size(const struct virtio_device *vdev) 293 { 294 size_t max_segment_size = SIZE_MAX; 295 296 if (vring_use_dma_api(vdev)) 297 max_segment_size = dma_max_mapping_size(vdev->dev.parent); 298 299 return max_segment_size; 300 } 301 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 302 303 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 304 dma_addr_t *dma_handle, gfp_t flag, 305 struct device *dma_dev) 306 { 307 if (vring_use_dma_api(vdev)) { 308 return dma_alloc_coherent(dma_dev, size, 309 dma_handle, flag); 310 } else { 311 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 312 313 if (queue) { 314 phys_addr_t phys_addr = virt_to_phys(queue); 315 *dma_handle = (dma_addr_t)phys_addr; 316 317 /* 318 * Sanity check: make sure we dind't truncate 319 * the address. The only arches I can find that 320 * have 64-bit phys_addr_t but 32-bit dma_addr_t 321 * are certain non-highmem MIPS and x86 322 * configurations, but these configurations 323 * should never allocate physical pages above 32 324 * bits, so this is fine. Just in case, throw a 325 * warning and abort if we end up with an 326 * unrepresentable address. 327 */ 328 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 329 free_pages_exact(queue, PAGE_ALIGN(size)); 330 return NULL; 331 } 332 } 333 return queue; 334 } 335 } 336 337 static void vring_free_queue(struct virtio_device *vdev, size_t size, 338 void *queue, dma_addr_t dma_handle, 339 struct device *dma_dev) 340 { 341 if (vring_use_dma_api(vdev)) 342 dma_free_coherent(dma_dev, size, queue, dma_handle); 343 else 344 free_pages_exact(queue, PAGE_ALIGN(size)); 345 } 346 347 /* 348 * The DMA ops on various arches are rather gnarly right now, and 349 * making all of the arch DMA ops work on the vring device itself 350 * is a mess. 351 */ 352 static struct device *vring_dma_dev(const struct vring_virtqueue *vq) 353 { 354 return vq->dma_dev; 355 } 356 357 /* Map one sg entry. */ 358 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 359 struct scatterlist *sg, 360 enum dma_data_direction direction) 361 { 362 if (!vq->use_dma_api) { 363 /* 364 * If DMA is not used, KMSAN doesn't know that the scatterlist 365 * is initialized by the hardware. Explicitly check/unpoison it 366 * depending on the direction. 367 */ 368 kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction); 369 return (dma_addr_t)sg_phys(sg); 370 } 371 372 /* 373 * We can't use dma_map_sg, because we don't use scatterlists in 374 * the way it expects (we don't guarantee that the scatterlist 375 * will exist for the lifetime of the mapping). 376 */ 377 return dma_map_page(vring_dma_dev(vq), 378 sg_page(sg), sg->offset, sg->length, 379 direction); 380 } 381 382 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 383 void *cpu_addr, size_t size, 384 enum dma_data_direction direction) 385 { 386 if (!vq->use_dma_api) 387 return (dma_addr_t)virt_to_phys(cpu_addr); 388 389 return dma_map_single(vring_dma_dev(vq), 390 cpu_addr, size, direction); 391 } 392 393 static int vring_mapping_error(const struct vring_virtqueue *vq, 394 dma_addr_t addr) 395 { 396 if (!vq->use_dma_api) 397 return 0; 398 399 return dma_mapping_error(vring_dma_dev(vq), addr); 400 } 401 402 static void virtqueue_init(struct vring_virtqueue *vq, u32 num) 403 { 404 vq->vq.num_free = num; 405 406 if (vq->packed_ring) 407 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR); 408 else 409 vq->last_used_idx = 0; 410 411 vq->event_triggered = false; 412 vq->num_added = 0; 413 414 #ifdef DEBUG 415 vq->in_use = false; 416 vq->last_add_time_valid = false; 417 #endif 418 } 419 420 421 /* 422 * Split ring specific functions - *_split(). 423 */ 424 425 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, 426 const struct vring_desc *desc) 427 { 428 u16 flags; 429 430 if (!vq->use_dma_api) 431 return; 432 433 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 434 435 dma_unmap_page(vring_dma_dev(vq), 436 virtio64_to_cpu(vq->vq.vdev, desc->addr), 437 virtio32_to_cpu(vq->vq.vdev, desc->len), 438 (flags & VRING_DESC_F_WRITE) ? 439 DMA_FROM_DEVICE : DMA_TO_DEVICE); 440 } 441 442 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, 443 unsigned int i) 444 { 445 struct vring_desc_extra *extra = vq->split.desc_extra; 446 u16 flags; 447 448 if (!vq->use_dma_api) 449 goto out; 450 451 flags = extra[i].flags; 452 453 if (flags & VRING_DESC_F_INDIRECT) { 454 dma_unmap_single(vring_dma_dev(vq), 455 extra[i].addr, 456 extra[i].len, 457 (flags & VRING_DESC_F_WRITE) ? 458 DMA_FROM_DEVICE : DMA_TO_DEVICE); 459 } else { 460 dma_unmap_page(vring_dma_dev(vq), 461 extra[i].addr, 462 extra[i].len, 463 (flags & VRING_DESC_F_WRITE) ? 464 DMA_FROM_DEVICE : DMA_TO_DEVICE); 465 } 466 467 out: 468 return extra[i].next; 469 } 470 471 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 472 unsigned int total_sg, 473 gfp_t gfp) 474 { 475 struct vring_desc *desc; 476 unsigned int i; 477 478 /* 479 * We require lowmem mappings for the descriptors because 480 * otherwise virt_to_phys will give us bogus addresses in the 481 * virtqueue. 482 */ 483 gfp &= ~__GFP_HIGHMEM; 484 485 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 486 if (!desc) 487 return NULL; 488 489 for (i = 0; i < total_sg; i++) 490 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 491 return desc; 492 } 493 494 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, 495 struct vring_desc *desc, 496 unsigned int i, 497 dma_addr_t addr, 498 unsigned int len, 499 u16 flags, 500 bool indirect) 501 { 502 struct vring_virtqueue *vring = to_vvq(vq); 503 struct vring_desc_extra *extra = vring->split.desc_extra; 504 u16 next; 505 506 desc[i].flags = cpu_to_virtio16(vq->vdev, flags); 507 desc[i].addr = cpu_to_virtio64(vq->vdev, addr); 508 desc[i].len = cpu_to_virtio32(vq->vdev, len); 509 510 if (!indirect) { 511 next = extra[i].next; 512 desc[i].next = cpu_to_virtio16(vq->vdev, next); 513 514 extra[i].addr = addr; 515 extra[i].len = len; 516 extra[i].flags = flags; 517 } else 518 next = virtio16_to_cpu(vq->vdev, desc[i].next); 519 520 return next; 521 } 522 523 static inline int virtqueue_add_split(struct virtqueue *_vq, 524 struct scatterlist *sgs[], 525 unsigned int total_sg, 526 unsigned int out_sgs, 527 unsigned int in_sgs, 528 void *data, 529 void *ctx, 530 gfp_t gfp) 531 { 532 struct vring_virtqueue *vq = to_vvq(_vq); 533 struct scatterlist *sg; 534 struct vring_desc *desc; 535 unsigned int i, n, avail, descs_used, prev, err_idx; 536 int head; 537 bool indirect; 538 539 START_USE(vq); 540 541 BUG_ON(data == NULL); 542 BUG_ON(ctx && vq->indirect); 543 544 if (unlikely(vq->broken)) { 545 END_USE(vq); 546 return -EIO; 547 } 548 549 LAST_ADD_TIME_UPDATE(vq); 550 551 BUG_ON(total_sg == 0); 552 553 head = vq->free_head; 554 555 if (virtqueue_use_indirect(vq, total_sg)) 556 desc = alloc_indirect_split(_vq, total_sg, gfp); 557 else { 558 desc = NULL; 559 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 560 } 561 562 if (desc) { 563 /* Use a single buffer which doesn't continue */ 564 indirect = true; 565 /* Set up rest to use this indirect table. */ 566 i = 0; 567 descs_used = 1; 568 } else { 569 indirect = false; 570 desc = vq->split.vring.desc; 571 i = head; 572 descs_used = total_sg; 573 } 574 575 if (unlikely(vq->vq.num_free < descs_used)) { 576 pr_debug("Can't add buf len %i - avail = %i\n", 577 descs_used, vq->vq.num_free); 578 /* FIXME: for historical reasons, we force a notify here if 579 * there are outgoing parts to the buffer. Presumably the 580 * host should service the ring ASAP. */ 581 if (out_sgs) 582 vq->notify(&vq->vq); 583 if (indirect) 584 kfree(desc); 585 END_USE(vq); 586 return -ENOSPC; 587 } 588 589 for (n = 0; n < out_sgs; n++) { 590 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 591 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 592 if (vring_mapping_error(vq, addr)) 593 goto unmap_release; 594 595 prev = i; 596 /* Note that we trust indirect descriptor 597 * table since it use stream DMA mapping. 598 */ 599 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, 600 VRING_DESC_F_NEXT, 601 indirect); 602 } 603 } 604 for (; n < (out_sgs + in_sgs); n++) { 605 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 606 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 607 if (vring_mapping_error(vq, addr)) 608 goto unmap_release; 609 610 prev = i; 611 /* Note that we trust indirect descriptor 612 * table since it use stream DMA mapping. 613 */ 614 i = virtqueue_add_desc_split(_vq, desc, i, addr, 615 sg->length, 616 VRING_DESC_F_NEXT | 617 VRING_DESC_F_WRITE, 618 indirect); 619 } 620 } 621 /* Last one doesn't continue. */ 622 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 623 if (!indirect && vq->use_dma_api) 624 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= 625 ~VRING_DESC_F_NEXT; 626 627 if (indirect) { 628 /* Now that the indirect table is filled in, map it. */ 629 dma_addr_t addr = vring_map_single( 630 vq, desc, total_sg * sizeof(struct vring_desc), 631 DMA_TO_DEVICE); 632 if (vring_mapping_error(vq, addr)) 633 goto unmap_release; 634 635 virtqueue_add_desc_split(_vq, vq->split.vring.desc, 636 head, addr, 637 total_sg * sizeof(struct vring_desc), 638 VRING_DESC_F_INDIRECT, 639 false); 640 } 641 642 /* We're using some buffers from the free list. */ 643 vq->vq.num_free -= descs_used; 644 645 /* Update free pointer */ 646 if (indirect) 647 vq->free_head = vq->split.desc_extra[head].next; 648 else 649 vq->free_head = i; 650 651 /* Store token and indirect buffer state. */ 652 vq->split.desc_state[head].data = data; 653 if (indirect) 654 vq->split.desc_state[head].indir_desc = desc; 655 else 656 vq->split.desc_state[head].indir_desc = ctx; 657 658 /* Put entry in available array (but don't update avail->idx until they 659 * do sync). */ 660 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 661 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 662 663 /* Descriptors and available array need to be set before we expose the 664 * new available array entries. */ 665 virtio_wmb(vq->weak_barriers); 666 vq->split.avail_idx_shadow++; 667 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 668 vq->split.avail_idx_shadow); 669 vq->num_added++; 670 671 pr_debug("Added buffer head %i to %p\n", head, vq); 672 END_USE(vq); 673 674 /* This is very unlikely, but theoretically possible. Kick 675 * just in case. */ 676 if (unlikely(vq->num_added == (1 << 16) - 1)) 677 virtqueue_kick(_vq); 678 679 return 0; 680 681 unmap_release: 682 err_idx = i; 683 684 if (indirect) 685 i = 0; 686 else 687 i = head; 688 689 for (n = 0; n < total_sg; n++) { 690 if (i == err_idx) 691 break; 692 if (indirect) { 693 vring_unmap_one_split_indirect(vq, &desc[i]); 694 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 695 } else 696 i = vring_unmap_one_split(vq, i); 697 } 698 699 if (indirect) 700 kfree(desc); 701 702 END_USE(vq); 703 return -ENOMEM; 704 } 705 706 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 707 { 708 struct vring_virtqueue *vq = to_vvq(_vq); 709 u16 new, old; 710 bool needs_kick; 711 712 START_USE(vq); 713 /* We need to expose available array entries before checking avail 714 * event. */ 715 virtio_mb(vq->weak_barriers); 716 717 old = vq->split.avail_idx_shadow - vq->num_added; 718 new = vq->split.avail_idx_shadow; 719 vq->num_added = 0; 720 721 LAST_ADD_TIME_CHECK(vq); 722 LAST_ADD_TIME_INVALID(vq); 723 724 if (vq->event) { 725 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 726 vring_avail_event(&vq->split.vring)), 727 new, old); 728 } else { 729 needs_kick = !(vq->split.vring.used->flags & 730 cpu_to_virtio16(_vq->vdev, 731 VRING_USED_F_NO_NOTIFY)); 732 } 733 END_USE(vq); 734 return needs_kick; 735 } 736 737 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 738 void **ctx) 739 { 740 unsigned int i, j; 741 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 742 743 /* Clear data ptr. */ 744 vq->split.desc_state[head].data = NULL; 745 746 /* Put back on free list: unmap first-level descriptors and find end */ 747 i = head; 748 749 while (vq->split.vring.desc[i].flags & nextflag) { 750 vring_unmap_one_split(vq, i); 751 i = vq->split.desc_extra[i].next; 752 vq->vq.num_free++; 753 } 754 755 vring_unmap_one_split(vq, i); 756 vq->split.desc_extra[i].next = vq->free_head; 757 vq->free_head = head; 758 759 /* Plus final descriptor */ 760 vq->vq.num_free++; 761 762 if (vq->indirect) { 763 struct vring_desc *indir_desc = 764 vq->split.desc_state[head].indir_desc; 765 u32 len; 766 767 /* Free the indirect table, if any, now that it's unmapped. */ 768 if (!indir_desc) 769 return; 770 771 len = vq->split.desc_extra[head].len; 772 773 BUG_ON(!(vq->split.desc_extra[head].flags & 774 VRING_DESC_F_INDIRECT)); 775 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 776 777 for (j = 0; j < len / sizeof(struct vring_desc); j++) 778 vring_unmap_one_split_indirect(vq, &indir_desc[j]); 779 780 kfree(indir_desc); 781 vq->split.desc_state[head].indir_desc = NULL; 782 } else if (ctx) { 783 *ctx = vq->split.desc_state[head].indir_desc; 784 } 785 } 786 787 static bool more_used_split(const struct vring_virtqueue *vq) 788 { 789 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 790 vq->split.vring.used->idx); 791 } 792 793 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 794 unsigned int *len, 795 void **ctx) 796 { 797 struct vring_virtqueue *vq = to_vvq(_vq); 798 void *ret; 799 unsigned int i; 800 u16 last_used; 801 802 START_USE(vq); 803 804 if (unlikely(vq->broken)) { 805 END_USE(vq); 806 return NULL; 807 } 808 809 if (!more_used_split(vq)) { 810 pr_debug("No more buffers in queue\n"); 811 END_USE(vq); 812 return NULL; 813 } 814 815 /* Only get used array entries after they have been exposed by host. */ 816 virtio_rmb(vq->weak_barriers); 817 818 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 819 i = virtio32_to_cpu(_vq->vdev, 820 vq->split.vring.used->ring[last_used].id); 821 *len = virtio32_to_cpu(_vq->vdev, 822 vq->split.vring.used->ring[last_used].len); 823 824 if (unlikely(i >= vq->split.vring.num)) { 825 BAD_RING(vq, "id %u out of range\n", i); 826 return NULL; 827 } 828 if (unlikely(!vq->split.desc_state[i].data)) { 829 BAD_RING(vq, "id %u is not a head!\n", i); 830 return NULL; 831 } 832 833 /* detach_buf_split clears data, so grab it now. */ 834 ret = vq->split.desc_state[i].data; 835 detach_buf_split(vq, i, ctx); 836 vq->last_used_idx++; 837 /* If we expect an interrupt for the next entry, tell host 838 * by writing event index and flush out the write before 839 * the read in the next get_buf call. */ 840 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 841 virtio_store_mb(vq->weak_barriers, 842 &vring_used_event(&vq->split.vring), 843 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 844 845 LAST_ADD_TIME_INVALID(vq); 846 847 END_USE(vq); 848 return ret; 849 } 850 851 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 852 { 853 struct vring_virtqueue *vq = to_vvq(_vq); 854 855 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 856 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 857 858 /* 859 * If device triggered an event already it won't trigger one again: 860 * no need to disable. 861 */ 862 if (vq->event_triggered) 863 return; 864 865 if (vq->event) 866 /* TODO: this is a hack. Figure out a cleaner value to write. */ 867 vring_used_event(&vq->split.vring) = 0x0; 868 else 869 vq->split.vring.avail->flags = 870 cpu_to_virtio16(_vq->vdev, 871 vq->split.avail_flags_shadow); 872 } 873 } 874 875 static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 876 { 877 struct vring_virtqueue *vq = to_vvq(_vq); 878 u16 last_used_idx; 879 880 START_USE(vq); 881 882 /* We optimistically turn back on interrupts, then check if there was 883 * more to do. */ 884 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 885 * either clear the flags bit or point the event index at the next 886 * entry. Always do both to keep code simple. */ 887 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 888 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 889 if (!vq->event) 890 vq->split.vring.avail->flags = 891 cpu_to_virtio16(_vq->vdev, 892 vq->split.avail_flags_shadow); 893 } 894 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 895 last_used_idx = vq->last_used_idx); 896 END_USE(vq); 897 return last_used_idx; 898 } 899 900 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx) 901 { 902 struct vring_virtqueue *vq = to_vvq(_vq); 903 904 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 905 vq->split.vring.used->idx); 906 } 907 908 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 909 { 910 struct vring_virtqueue *vq = to_vvq(_vq); 911 u16 bufs; 912 913 START_USE(vq); 914 915 /* We optimistically turn back on interrupts, then check if there was 916 * more to do. */ 917 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 918 * either clear the flags bit or point the event index at the next 919 * entry. Always update the event index to keep code simple. */ 920 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 921 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 922 if (!vq->event) 923 vq->split.vring.avail->flags = 924 cpu_to_virtio16(_vq->vdev, 925 vq->split.avail_flags_shadow); 926 } 927 /* TODO: tune this threshold */ 928 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 929 930 virtio_store_mb(vq->weak_barriers, 931 &vring_used_event(&vq->split.vring), 932 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 933 934 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 935 - vq->last_used_idx) > bufs)) { 936 END_USE(vq); 937 return false; 938 } 939 940 END_USE(vq); 941 return true; 942 } 943 944 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 945 { 946 struct vring_virtqueue *vq = to_vvq(_vq); 947 unsigned int i; 948 void *buf; 949 950 START_USE(vq); 951 952 for (i = 0; i < vq->split.vring.num; i++) { 953 if (!vq->split.desc_state[i].data) 954 continue; 955 /* detach_buf_split clears data, so grab it now. */ 956 buf = vq->split.desc_state[i].data; 957 detach_buf_split(vq, i, NULL); 958 vq->split.avail_idx_shadow--; 959 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 960 vq->split.avail_idx_shadow); 961 END_USE(vq); 962 return buf; 963 } 964 /* That should have freed everything. */ 965 BUG_ON(vq->vq.num_free != vq->split.vring.num); 966 967 END_USE(vq); 968 return NULL; 969 } 970 971 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split, 972 struct vring_virtqueue *vq) 973 { 974 struct virtio_device *vdev; 975 976 vdev = vq->vq.vdev; 977 978 vring_split->avail_flags_shadow = 0; 979 vring_split->avail_idx_shadow = 0; 980 981 /* No callback? Tell other side not to bother us. */ 982 if (!vq->vq.callback) { 983 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 984 if (!vq->event) 985 vring_split->vring.avail->flags = cpu_to_virtio16(vdev, 986 vring_split->avail_flags_shadow); 987 } 988 } 989 990 static void virtqueue_reinit_split(struct vring_virtqueue *vq) 991 { 992 int num; 993 994 num = vq->split.vring.num; 995 996 vq->split.vring.avail->flags = 0; 997 vq->split.vring.avail->idx = 0; 998 999 /* reset avail event */ 1000 vq->split.vring.avail->ring[num] = 0; 1001 1002 vq->split.vring.used->flags = 0; 1003 vq->split.vring.used->idx = 0; 1004 1005 /* reset used event */ 1006 *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0; 1007 1008 virtqueue_init(vq, num); 1009 1010 virtqueue_vring_init_split(&vq->split, vq); 1011 } 1012 1013 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq, 1014 struct vring_virtqueue_split *vring_split) 1015 { 1016 vq->split = *vring_split; 1017 1018 /* Put everything in free lists. */ 1019 vq->free_head = 0; 1020 } 1021 1022 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split) 1023 { 1024 struct vring_desc_state_split *state; 1025 struct vring_desc_extra *extra; 1026 u32 num = vring_split->vring.num; 1027 1028 state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL); 1029 if (!state) 1030 goto err_state; 1031 1032 extra = vring_alloc_desc_extra(num); 1033 if (!extra) 1034 goto err_extra; 1035 1036 memset(state, 0, num * sizeof(struct vring_desc_state_split)); 1037 1038 vring_split->desc_state = state; 1039 vring_split->desc_extra = extra; 1040 return 0; 1041 1042 err_extra: 1043 kfree(state); 1044 err_state: 1045 return -ENOMEM; 1046 } 1047 1048 static void vring_free_split(struct vring_virtqueue_split *vring_split, 1049 struct virtio_device *vdev, struct device *dma_dev) 1050 { 1051 vring_free_queue(vdev, vring_split->queue_size_in_bytes, 1052 vring_split->vring.desc, 1053 vring_split->queue_dma_addr, 1054 dma_dev); 1055 1056 kfree(vring_split->desc_state); 1057 kfree(vring_split->desc_extra); 1058 } 1059 1060 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, 1061 struct virtio_device *vdev, 1062 u32 num, 1063 unsigned int vring_align, 1064 bool may_reduce_num, 1065 struct device *dma_dev) 1066 { 1067 void *queue = NULL; 1068 dma_addr_t dma_addr; 1069 1070 /* We assume num is a power of 2. */ 1071 if (!is_power_of_2(num)) { 1072 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 1073 return -EINVAL; 1074 } 1075 1076 /* TODO: allocate each queue chunk individually */ 1077 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 1078 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1079 &dma_addr, 1080 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1081 dma_dev); 1082 if (queue) 1083 break; 1084 if (!may_reduce_num) 1085 return -ENOMEM; 1086 } 1087 1088 if (!num) 1089 return -ENOMEM; 1090 1091 if (!queue) { 1092 /* Try to get a single page. You are my only hope! */ 1093 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1094 &dma_addr, GFP_KERNEL | __GFP_ZERO, 1095 dma_dev); 1096 } 1097 if (!queue) 1098 return -ENOMEM; 1099 1100 vring_init(&vring_split->vring, num, queue, vring_align); 1101 1102 vring_split->queue_dma_addr = dma_addr; 1103 vring_split->queue_size_in_bytes = vring_size(num, vring_align); 1104 1105 vring_split->vring_align = vring_align; 1106 vring_split->may_reduce_num = may_reduce_num; 1107 1108 return 0; 1109 } 1110 1111 static struct virtqueue *vring_create_virtqueue_split( 1112 unsigned int index, 1113 unsigned int num, 1114 unsigned int vring_align, 1115 struct virtio_device *vdev, 1116 bool weak_barriers, 1117 bool may_reduce_num, 1118 bool context, 1119 bool (*notify)(struct virtqueue *), 1120 void (*callback)(struct virtqueue *), 1121 const char *name, 1122 struct device *dma_dev) 1123 { 1124 struct vring_virtqueue_split vring_split = {}; 1125 struct virtqueue *vq; 1126 int err; 1127 1128 err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align, 1129 may_reduce_num, dma_dev); 1130 if (err) 1131 return NULL; 1132 1133 vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers, 1134 context, notify, callback, name, dma_dev); 1135 if (!vq) { 1136 vring_free_split(&vring_split, vdev, dma_dev); 1137 return NULL; 1138 } 1139 1140 to_vvq(vq)->we_own_ring = true; 1141 1142 return vq; 1143 } 1144 1145 static int virtqueue_resize_split(struct virtqueue *_vq, u32 num) 1146 { 1147 struct vring_virtqueue_split vring_split = {}; 1148 struct vring_virtqueue *vq = to_vvq(_vq); 1149 struct virtio_device *vdev = _vq->vdev; 1150 int err; 1151 1152 err = vring_alloc_queue_split(&vring_split, vdev, num, 1153 vq->split.vring_align, 1154 vq->split.may_reduce_num, 1155 vring_dma_dev(vq)); 1156 if (err) 1157 goto err; 1158 1159 err = vring_alloc_state_extra_split(&vring_split); 1160 if (err) 1161 goto err_state_extra; 1162 1163 vring_free(&vq->vq); 1164 1165 virtqueue_vring_init_split(&vring_split, vq); 1166 1167 virtqueue_init(vq, vring_split.vring.num); 1168 virtqueue_vring_attach_split(vq, &vring_split); 1169 1170 return 0; 1171 1172 err_state_extra: 1173 vring_free_split(&vring_split, vdev, vring_dma_dev(vq)); 1174 err: 1175 virtqueue_reinit_split(vq); 1176 return -ENOMEM; 1177 } 1178 1179 1180 /* 1181 * Packed ring specific functions - *_packed(). 1182 */ 1183 static bool packed_used_wrap_counter(u16 last_used_idx) 1184 { 1185 return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 1186 } 1187 1188 static u16 packed_last_used(u16 last_used_idx) 1189 { 1190 return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 1191 } 1192 1193 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, 1194 const struct vring_desc_extra *extra) 1195 { 1196 u16 flags; 1197 1198 if (!vq->use_dma_api) 1199 return; 1200 1201 flags = extra->flags; 1202 1203 if (flags & VRING_DESC_F_INDIRECT) { 1204 dma_unmap_single(vring_dma_dev(vq), 1205 extra->addr, extra->len, 1206 (flags & VRING_DESC_F_WRITE) ? 1207 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1208 } else { 1209 dma_unmap_page(vring_dma_dev(vq), 1210 extra->addr, extra->len, 1211 (flags & VRING_DESC_F_WRITE) ? 1212 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1213 } 1214 } 1215 1216 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 1217 const struct vring_packed_desc *desc) 1218 { 1219 u16 flags; 1220 1221 if (!vq->use_dma_api) 1222 return; 1223 1224 flags = le16_to_cpu(desc->flags); 1225 1226 dma_unmap_page(vring_dma_dev(vq), 1227 le64_to_cpu(desc->addr), 1228 le32_to_cpu(desc->len), 1229 (flags & VRING_DESC_F_WRITE) ? 1230 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1231 } 1232 1233 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 1234 gfp_t gfp) 1235 { 1236 struct vring_packed_desc *desc; 1237 1238 /* 1239 * We require lowmem mappings for the descriptors because 1240 * otherwise virt_to_phys will give us bogus addresses in the 1241 * virtqueue. 1242 */ 1243 gfp &= ~__GFP_HIGHMEM; 1244 1245 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 1246 1247 return desc; 1248 } 1249 1250 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 1251 struct scatterlist *sgs[], 1252 unsigned int total_sg, 1253 unsigned int out_sgs, 1254 unsigned int in_sgs, 1255 void *data, 1256 gfp_t gfp) 1257 { 1258 struct vring_packed_desc *desc; 1259 struct scatterlist *sg; 1260 unsigned int i, n, err_idx; 1261 u16 head, id; 1262 dma_addr_t addr; 1263 1264 head = vq->packed.next_avail_idx; 1265 desc = alloc_indirect_packed(total_sg, gfp); 1266 if (!desc) 1267 return -ENOMEM; 1268 1269 if (unlikely(vq->vq.num_free < 1)) { 1270 pr_debug("Can't add buf len 1 - avail = 0\n"); 1271 kfree(desc); 1272 END_USE(vq); 1273 return -ENOSPC; 1274 } 1275 1276 i = 0; 1277 id = vq->free_head; 1278 BUG_ON(id == vq->packed.vring.num); 1279 1280 for (n = 0; n < out_sgs + in_sgs; n++) { 1281 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1282 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1283 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1284 if (vring_mapping_error(vq, addr)) 1285 goto unmap_release; 1286 1287 desc[i].flags = cpu_to_le16(n < out_sgs ? 1288 0 : VRING_DESC_F_WRITE); 1289 desc[i].addr = cpu_to_le64(addr); 1290 desc[i].len = cpu_to_le32(sg->length); 1291 i++; 1292 } 1293 } 1294 1295 /* Now that the indirect table is filled in, map it. */ 1296 addr = vring_map_single(vq, desc, 1297 total_sg * sizeof(struct vring_packed_desc), 1298 DMA_TO_DEVICE); 1299 if (vring_mapping_error(vq, addr)) 1300 goto unmap_release; 1301 1302 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1303 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1304 sizeof(struct vring_packed_desc)); 1305 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1306 1307 if (vq->use_dma_api) { 1308 vq->packed.desc_extra[id].addr = addr; 1309 vq->packed.desc_extra[id].len = total_sg * 1310 sizeof(struct vring_packed_desc); 1311 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1312 vq->packed.avail_used_flags; 1313 } 1314 1315 /* 1316 * A driver MUST NOT make the first descriptor in the list 1317 * available before all subsequent descriptors comprising 1318 * the list are made available. 1319 */ 1320 virtio_wmb(vq->weak_barriers); 1321 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1322 vq->packed.avail_used_flags); 1323 1324 /* We're using some buffers from the free list. */ 1325 vq->vq.num_free -= 1; 1326 1327 /* Update free pointer */ 1328 n = head + 1; 1329 if (n >= vq->packed.vring.num) { 1330 n = 0; 1331 vq->packed.avail_wrap_counter ^= 1; 1332 vq->packed.avail_used_flags ^= 1333 1 << VRING_PACKED_DESC_F_AVAIL | 1334 1 << VRING_PACKED_DESC_F_USED; 1335 } 1336 vq->packed.next_avail_idx = n; 1337 vq->free_head = vq->packed.desc_extra[id].next; 1338 1339 /* Store token and indirect buffer state. */ 1340 vq->packed.desc_state[id].num = 1; 1341 vq->packed.desc_state[id].data = data; 1342 vq->packed.desc_state[id].indir_desc = desc; 1343 vq->packed.desc_state[id].last = id; 1344 1345 vq->num_added += 1; 1346 1347 pr_debug("Added buffer head %i to %p\n", head, vq); 1348 END_USE(vq); 1349 1350 return 0; 1351 1352 unmap_release: 1353 err_idx = i; 1354 1355 for (i = 0; i < err_idx; i++) 1356 vring_unmap_desc_packed(vq, &desc[i]); 1357 1358 kfree(desc); 1359 1360 END_USE(vq); 1361 return -ENOMEM; 1362 } 1363 1364 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1365 struct scatterlist *sgs[], 1366 unsigned int total_sg, 1367 unsigned int out_sgs, 1368 unsigned int in_sgs, 1369 void *data, 1370 void *ctx, 1371 gfp_t gfp) 1372 { 1373 struct vring_virtqueue *vq = to_vvq(_vq); 1374 struct vring_packed_desc *desc; 1375 struct scatterlist *sg; 1376 unsigned int i, n, c, descs_used, err_idx; 1377 __le16 head_flags, flags; 1378 u16 head, id, prev, curr, avail_used_flags; 1379 int err; 1380 1381 START_USE(vq); 1382 1383 BUG_ON(data == NULL); 1384 BUG_ON(ctx && vq->indirect); 1385 1386 if (unlikely(vq->broken)) { 1387 END_USE(vq); 1388 return -EIO; 1389 } 1390 1391 LAST_ADD_TIME_UPDATE(vq); 1392 1393 BUG_ON(total_sg == 0); 1394 1395 if (virtqueue_use_indirect(vq, total_sg)) { 1396 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1397 in_sgs, data, gfp); 1398 if (err != -ENOMEM) { 1399 END_USE(vq); 1400 return err; 1401 } 1402 1403 /* fall back on direct */ 1404 } 1405 1406 head = vq->packed.next_avail_idx; 1407 avail_used_flags = vq->packed.avail_used_flags; 1408 1409 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1410 1411 desc = vq->packed.vring.desc; 1412 i = head; 1413 descs_used = total_sg; 1414 1415 if (unlikely(vq->vq.num_free < descs_used)) { 1416 pr_debug("Can't add buf len %i - avail = %i\n", 1417 descs_used, vq->vq.num_free); 1418 END_USE(vq); 1419 return -ENOSPC; 1420 } 1421 1422 id = vq->free_head; 1423 BUG_ON(id == vq->packed.vring.num); 1424 1425 curr = id; 1426 c = 0; 1427 for (n = 0; n < out_sgs + in_sgs; n++) { 1428 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1429 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1430 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1431 if (vring_mapping_error(vq, addr)) 1432 goto unmap_release; 1433 1434 flags = cpu_to_le16(vq->packed.avail_used_flags | 1435 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1436 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1437 if (i == head) 1438 head_flags = flags; 1439 else 1440 desc[i].flags = flags; 1441 1442 desc[i].addr = cpu_to_le64(addr); 1443 desc[i].len = cpu_to_le32(sg->length); 1444 desc[i].id = cpu_to_le16(id); 1445 1446 if (unlikely(vq->use_dma_api)) { 1447 vq->packed.desc_extra[curr].addr = addr; 1448 vq->packed.desc_extra[curr].len = sg->length; 1449 vq->packed.desc_extra[curr].flags = 1450 le16_to_cpu(flags); 1451 } 1452 prev = curr; 1453 curr = vq->packed.desc_extra[curr].next; 1454 1455 if ((unlikely(++i >= vq->packed.vring.num))) { 1456 i = 0; 1457 vq->packed.avail_used_flags ^= 1458 1 << VRING_PACKED_DESC_F_AVAIL | 1459 1 << VRING_PACKED_DESC_F_USED; 1460 } 1461 } 1462 } 1463 1464 if (i < head) 1465 vq->packed.avail_wrap_counter ^= 1; 1466 1467 /* We're using some buffers from the free list. */ 1468 vq->vq.num_free -= descs_used; 1469 1470 /* Update free pointer */ 1471 vq->packed.next_avail_idx = i; 1472 vq->free_head = curr; 1473 1474 /* Store token. */ 1475 vq->packed.desc_state[id].num = descs_used; 1476 vq->packed.desc_state[id].data = data; 1477 vq->packed.desc_state[id].indir_desc = ctx; 1478 vq->packed.desc_state[id].last = prev; 1479 1480 /* 1481 * A driver MUST NOT make the first descriptor in the list 1482 * available before all subsequent descriptors comprising 1483 * the list are made available. 1484 */ 1485 virtio_wmb(vq->weak_barriers); 1486 vq->packed.vring.desc[head].flags = head_flags; 1487 vq->num_added += descs_used; 1488 1489 pr_debug("Added buffer head %i to %p\n", head, vq); 1490 END_USE(vq); 1491 1492 return 0; 1493 1494 unmap_release: 1495 err_idx = i; 1496 i = head; 1497 curr = vq->free_head; 1498 1499 vq->packed.avail_used_flags = avail_used_flags; 1500 1501 for (n = 0; n < total_sg; n++) { 1502 if (i == err_idx) 1503 break; 1504 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); 1505 curr = vq->packed.desc_extra[curr].next; 1506 i++; 1507 if (i >= vq->packed.vring.num) 1508 i = 0; 1509 } 1510 1511 END_USE(vq); 1512 return -EIO; 1513 } 1514 1515 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1516 { 1517 struct vring_virtqueue *vq = to_vvq(_vq); 1518 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1519 bool needs_kick; 1520 union { 1521 struct { 1522 __le16 off_wrap; 1523 __le16 flags; 1524 }; 1525 u32 u32; 1526 } snapshot; 1527 1528 START_USE(vq); 1529 1530 /* 1531 * We need to expose the new flags value before checking notification 1532 * suppressions. 1533 */ 1534 virtio_mb(vq->weak_barriers); 1535 1536 old = vq->packed.next_avail_idx - vq->num_added; 1537 new = vq->packed.next_avail_idx; 1538 vq->num_added = 0; 1539 1540 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1541 flags = le16_to_cpu(snapshot.flags); 1542 1543 LAST_ADD_TIME_CHECK(vq); 1544 LAST_ADD_TIME_INVALID(vq); 1545 1546 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1547 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1548 goto out; 1549 } 1550 1551 off_wrap = le16_to_cpu(snapshot.off_wrap); 1552 1553 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1554 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1555 if (wrap_counter != vq->packed.avail_wrap_counter) 1556 event_idx -= vq->packed.vring.num; 1557 1558 needs_kick = vring_need_event(event_idx, new, old); 1559 out: 1560 END_USE(vq); 1561 return needs_kick; 1562 } 1563 1564 static void detach_buf_packed(struct vring_virtqueue *vq, 1565 unsigned int id, void **ctx) 1566 { 1567 struct vring_desc_state_packed *state = NULL; 1568 struct vring_packed_desc *desc; 1569 unsigned int i, curr; 1570 1571 state = &vq->packed.desc_state[id]; 1572 1573 /* Clear data ptr. */ 1574 state->data = NULL; 1575 1576 vq->packed.desc_extra[state->last].next = vq->free_head; 1577 vq->free_head = id; 1578 vq->vq.num_free += state->num; 1579 1580 if (unlikely(vq->use_dma_api)) { 1581 curr = id; 1582 for (i = 0; i < state->num; i++) { 1583 vring_unmap_extra_packed(vq, 1584 &vq->packed.desc_extra[curr]); 1585 curr = vq->packed.desc_extra[curr].next; 1586 } 1587 } 1588 1589 if (vq->indirect) { 1590 u32 len; 1591 1592 /* Free the indirect table, if any, now that it's unmapped. */ 1593 desc = state->indir_desc; 1594 if (!desc) 1595 return; 1596 1597 if (vq->use_dma_api) { 1598 len = vq->packed.desc_extra[id].len; 1599 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1600 i++) 1601 vring_unmap_desc_packed(vq, &desc[i]); 1602 } 1603 kfree(desc); 1604 state->indir_desc = NULL; 1605 } else if (ctx) { 1606 *ctx = state->indir_desc; 1607 } 1608 } 1609 1610 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1611 u16 idx, bool used_wrap_counter) 1612 { 1613 bool avail, used; 1614 u16 flags; 1615 1616 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1617 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1618 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1619 1620 return avail == used && used == used_wrap_counter; 1621 } 1622 1623 static bool more_used_packed(const struct vring_virtqueue *vq) 1624 { 1625 u16 last_used; 1626 u16 last_used_idx; 1627 bool used_wrap_counter; 1628 1629 last_used_idx = READ_ONCE(vq->last_used_idx); 1630 last_used = packed_last_used(last_used_idx); 1631 used_wrap_counter = packed_used_wrap_counter(last_used_idx); 1632 return is_used_desc_packed(vq, last_used, used_wrap_counter); 1633 } 1634 1635 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1636 unsigned int *len, 1637 void **ctx) 1638 { 1639 struct vring_virtqueue *vq = to_vvq(_vq); 1640 u16 last_used, id, last_used_idx; 1641 bool used_wrap_counter; 1642 void *ret; 1643 1644 START_USE(vq); 1645 1646 if (unlikely(vq->broken)) { 1647 END_USE(vq); 1648 return NULL; 1649 } 1650 1651 if (!more_used_packed(vq)) { 1652 pr_debug("No more buffers in queue\n"); 1653 END_USE(vq); 1654 return NULL; 1655 } 1656 1657 /* Only get used elements after they have been exposed by host. */ 1658 virtio_rmb(vq->weak_barriers); 1659 1660 last_used_idx = READ_ONCE(vq->last_used_idx); 1661 used_wrap_counter = packed_used_wrap_counter(last_used_idx); 1662 last_used = packed_last_used(last_used_idx); 1663 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1664 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1665 1666 if (unlikely(id >= vq->packed.vring.num)) { 1667 BAD_RING(vq, "id %u out of range\n", id); 1668 return NULL; 1669 } 1670 if (unlikely(!vq->packed.desc_state[id].data)) { 1671 BAD_RING(vq, "id %u is not a head!\n", id); 1672 return NULL; 1673 } 1674 1675 /* detach_buf_packed clears data, so grab it now. */ 1676 ret = vq->packed.desc_state[id].data; 1677 detach_buf_packed(vq, id, ctx); 1678 1679 last_used += vq->packed.desc_state[id].num; 1680 if (unlikely(last_used >= vq->packed.vring.num)) { 1681 last_used -= vq->packed.vring.num; 1682 used_wrap_counter ^= 1; 1683 } 1684 1685 last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1686 WRITE_ONCE(vq->last_used_idx, last_used); 1687 1688 /* 1689 * If we expect an interrupt for the next entry, tell host 1690 * by writing event index and flush out the write before 1691 * the read in the next get_buf call. 1692 */ 1693 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1694 virtio_store_mb(vq->weak_barriers, 1695 &vq->packed.vring.driver->off_wrap, 1696 cpu_to_le16(vq->last_used_idx)); 1697 1698 LAST_ADD_TIME_INVALID(vq); 1699 1700 END_USE(vq); 1701 return ret; 1702 } 1703 1704 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1705 { 1706 struct vring_virtqueue *vq = to_vvq(_vq); 1707 1708 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1709 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1710 1711 /* 1712 * If device triggered an event already it won't trigger one again: 1713 * no need to disable. 1714 */ 1715 if (vq->event_triggered) 1716 return; 1717 1718 vq->packed.vring.driver->flags = 1719 cpu_to_le16(vq->packed.event_flags_shadow); 1720 } 1721 } 1722 1723 static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1724 { 1725 struct vring_virtqueue *vq = to_vvq(_vq); 1726 1727 START_USE(vq); 1728 1729 /* 1730 * We optimistically turn back on interrupts, then check if there was 1731 * more to do. 1732 */ 1733 1734 if (vq->event) { 1735 vq->packed.vring.driver->off_wrap = 1736 cpu_to_le16(vq->last_used_idx); 1737 /* 1738 * We need to update event offset and event wrap 1739 * counter first before updating event flags. 1740 */ 1741 virtio_wmb(vq->weak_barriers); 1742 } 1743 1744 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1745 vq->packed.event_flags_shadow = vq->event ? 1746 VRING_PACKED_EVENT_FLAG_DESC : 1747 VRING_PACKED_EVENT_FLAG_ENABLE; 1748 vq->packed.vring.driver->flags = 1749 cpu_to_le16(vq->packed.event_flags_shadow); 1750 } 1751 1752 END_USE(vq); 1753 return vq->last_used_idx; 1754 } 1755 1756 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1757 { 1758 struct vring_virtqueue *vq = to_vvq(_vq); 1759 bool wrap_counter; 1760 u16 used_idx; 1761 1762 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1763 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1764 1765 return is_used_desc_packed(vq, used_idx, wrap_counter); 1766 } 1767 1768 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1769 { 1770 struct vring_virtqueue *vq = to_vvq(_vq); 1771 u16 used_idx, wrap_counter, last_used_idx; 1772 u16 bufs; 1773 1774 START_USE(vq); 1775 1776 /* 1777 * We optimistically turn back on interrupts, then check if there was 1778 * more to do. 1779 */ 1780 1781 if (vq->event) { 1782 /* TODO: tune this threshold */ 1783 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1784 last_used_idx = READ_ONCE(vq->last_used_idx); 1785 wrap_counter = packed_used_wrap_counter(last_used_idx); 1786 1787 used_idx = packed_last_used(last_used_idx) + bufs; 1788 if (used_idx >= vq->packed.vring.num) { 1789 used_idx -= vq->packed.vring.num; 1790 wrap_counter ^= 1; 1791 } 1792 1793 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1794 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1795 1796 /* 1797 * We need to update event offset and event wrap 1798 * counter first before updating event flags. 1799 */ 1800 virtio_wmb(vq->weak_barriers); 1801 } 1802 1803 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1804 vq->packed.event_flags_shadow = vq->event ? 1805 VRING_PACKED_EVENT_FLAG_DESC : 1806 VRING_PACKED_EVENT_FLAG_ENABLE; 1807 vq->packed.vring.driver->flags = 1808 cpu_to_le16(vq->packed.event_flags_shadow); 1809 } 1810 1811 /* 1812 * We need to update event suppression structure first 1813 * before re-checking for more used buffers. 1814 */ 1815 virtio_mb(vq->weak_barriers); 1816 1817 last_used_idx = READ_ONCE(vq->last_used_idx); 1818 wrap_counter = packed_used_wrap_counter(last_used_idx); 1819 used_idx = packed_last_used(last_used_idx); 1820 if (is_used_desc_packed(vq, used_idx, wrap_counter)) { 1821 END_USE(vq); 1822 return false; 1823 } 1824 1825 END_USE(vq); 1826 return true; 1827 } 1828 1829 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1830 { 1831 struct vring_virtqueue *vq = to_vvq(_vq); 1832 unsigned int i; 1833 void *buf; 1834 1835 START_USE(vq); 1836 1837 for (i = 0; i < vq->packed.vring.num; i++) { 1838 if (!vq->packed.desc_state[i].data) 1839 continue; 1840 /* detach_buf clears data, so grab it now. */ 1841 buf = vq->packed.desc_state[i].data; 1842 detach_buf_packed(vq, i, NULL); 1843 END_USE(vq); 1844 return buf; 1845 } 1846 /* That should have freed everything. */ 1847 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1848 1849 END_USE(vq); 1850 return NULL; 1851 } 1852 1853 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num) 1854 { 1855 struct vring_desc_extra *desc_extra; 1856 unsigned int i; 1857 1858 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), 1859 GFP_KERNEL); 1860 if (!desc_extra) 1861 return NULL; 1862 1863 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); 1864 1865 for (i = 0; i < num - 1; i++) 1866 desc_extra[i].next = i + 1; 1867 1868 return desc_extra; 1869 } 1870 1871 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed, 1872 struct virtio_device *vdev, 1873 struct device *dma_dev) 1874 { 1875 if (vring_packed->vring.desc) 1876 vring_free_queue(vdev, vring_packed->ring_size_in_bytes, 1877 vring_packed->vring.desc, 1878 vring_packed->ring_dma_addr, 1879 dma_dev); 1880 1881 if (vring_packed->vring.driver) 1882 vring_free_queue(vdev, vring_packed->event_size_in_bytes, 1883 vring_packed->vring.driver, 1884 vring_packed->driver_event_dma_addr, 1885 dma_dev); 1886 1887 if (vring_packed->vring.device) 1888 vring_free_queue(vdev, vring_packed->event_size_in_bytes, 1889 vring_packed->vring.device, 1890 vring_packed->device_event_dma_addr, 1891 dma_dev); 1892 1893 kfree(vring_packed->desc_state); 1894 kfree(vring_packed->desc_extra); 1895 } 1896 1897 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, 1898 struct virtio_device *vdev, 1899 u32 num, struct device *dma_dev) 1900 { 1901 struct vring_packed_desc *ring; 1902 struct vring_packed_desc_event *driver, *device; 1903 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1904 size_t ring_size_in_bytes, event_size_in_bytes; 1905 1906 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1907 1908 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1909 &ring_dma_addr, 1910 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1911 dma_dev); 1912 if (!ring) 1913 goto err; 1914 1915 vring_packed->vring.desc = ring; 1916 vring_packed->ring_dma_addr = ring_dma_addr; 1917 vring_packed->ring_size_in_bytes = ring_size_in_bytes; 1918 1919 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1920 1921 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1922 &driver_event_dma_addr, 1923 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1924 dma_dev); 1925 if (!driver) 1926 goto err; 1927 1928 vring_packed->vring.driver = driver; 1929 vring_packed->event_size_in_bytes = event_size_in_bytes; 1930 vring_packed->driver_event_dma_addr = driver_event_dma_addr; 1931 1932 device = vring_alloc_queue(vdev, event_size_in_bytes, 1933 &device_event_dma_addr, 1934 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1935 dma_dev); 1936 if (!device) 1937 goto err; 1938 1939 vring_packed->vring.device = device; 1940 vring_packed->device_event_dma_addr = device_event_dma_addr; 1941 1942 vring_packed->vring.num = num; 1943 1944 return 0; 1945 1946 err: 1947 vring_free_packed(vring_packed, vdev, dma_dev); 1948 return -ENOMEM; 1949 } 1950 1951 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed) 1952 { 1953 struct vring_desc_state_packed *state; 1954 struct vring_desc_extra *extra; 1955 u32 num = vring_packed->vring.num; 1956 1957 state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL); 1958 if (!state) 1959 goto err_desc_state; 1960 1961 memset(state, 0, num * sizeof(struct vring_desc_state_packed)); 1962 1963 extra = vring_alloc_desc_extra(num); 1964 if (!extra) 1965 goto err_desc_extra; 1966 1967 vring_packed->desc_state = state; 1968 vring_packed->desc_extra = extra; 1969 1970 return 0; 1971 1972 err_desc_extra: 1973 kfree(state); 1974 err_desc_state: 1975 return -ENOMEM; 1976 } 1977 1978 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed, 1979 bool callback) 1980 { 1981 vring_packed->next_avail_idx = 0; 1982 vring_packed->avail_wrap_counter = 1; 1983 vring_packed->event_flags_shadow = 0; 1984 vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1985 1986 /* No callback? Tell other side not to bother us. */ 1987 if (!callback) { 1988 vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1989 vring_packed->vring.driver->flags = 1990 cpu_to_le16(vring_packed->event_flags_shadow); 1991 } 1992 } 1993 1994 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq, 1995 struct vring_virtqueue_packed *vring_packed) 1996 { 1997 vq->packed = *vring_packed; 1998 1999 /* Put everything in free lists. */ 2000 vq->free_head = 0; 2001 } 2002 2003 static void virtqueue_reinit_packed(struct vring_virtqueue *vq) 2004 { 2005 memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes); 2006 memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes); 2007 2008 /* we need to reset the desc.flags. For more, see is_used_desc_packed() */ 2009 memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes); 2010 2011 virtqueue_init(vq, vq->packed.vring.num); 2012 virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback); 2013 } 2014 2015 static struct virtqueue *vring_create_virtqueue_packed( 2016 unsigned int index, 2017 unsigned int num, 2018 unsigned int vring_align, 2019 struct virtio_device *vdev, 2020 bool weak_barriers, 2021 bool may_reduce_num, 2022 bool context, 2023 bool (*notify)(struct virtqueue *), 2024 void (*callback)(struct virtqueue *), 2025 const char *name, 2026 struct device *dma_dev) 2027 { 2028 struct vring_virtqueue_packed vring_packed = {}; 2029 struct vring_virtqueue *vq; 2030 int err; 2031 2032 if (vring_alloc_queue_packed(&vring_packed, vdev, num, dma_dev)) 2033 goto err_ring; 2034 2035 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2036 if (!vq) 2037 goto err_vq; 2038 2039 vq->vq.callback = callback; 2040 vq->vq.vdev = vdev; 2041 vq->vq.name = name; 2042 vq->vq.index = index; 2043 vq->vq.reset = false; 2044 vq->we_own_ring = true; 2045 vq->notify = notify; 2046 vq->weak_barriers = weak_barriers; 2047 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 2048 vq->broken = true; 2049 #else 2050 vq->broken = false; 2051 #endif 2052 vq->packed_ring = true; 2053 vq->dma_dev = dma_dev; 2054 vq->use_dma_api = vring_use_dma_api(vdev); 2055 2056 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2057 !context; 2058 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2059 2060 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2061 vq->weak_barriers = false; 2062 2063 err = vring_alloc_state_extra_packed(&vring_packed); 2064 if (err) 2065 goto err_state_extra; 2066 2067 virtqueue_vring_init_packed(&vring_packed, !!callback); 2068 2069 virtqueue_init(vq, num); 2070 virtqueue_vring_attach_packed(vq, &vring_packed); 2071 2072 spin_lock(&vdev->vqs_list_lock); 2073 list_add_tail(&vq->vq.list, &vdev->vqs); 2074 spin_unlock(&vdev->vqs_list_lock); 2075 return &vq->vq; 2076 2077 err_state_extra: 2078 kfree(vq); 2079 err_vq: 2080 vring_free_packed(&vring_packed, vdev, dma_dev); 2081 err_ring: 2082 return NULL; 2083 } 2084 2085 static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num) 2086 { 2087 struct vring_virtqueue_packed vring_packed = {}; 2088 struct vring_virtqueue *vq = to_vvq(_vq); 2089 struct virtio_device *vdev = _vq->vdev; 2090 int err; 2091 2092 if (vring_alloc_queue_packed(&vring_packed, vdev, num, vring_dma_dev(vq))) 2093 goto err_ring; 2094 2095 err = vring_alloc_state_extra_packed(&vring_packed); 2096 if (err) 2097 goto err_state_extra; 2098 2099 vring_free(&vq->vq); 2100 2101 virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback); 2102 2103 virtqueue_init(vq, vring_packed.vring.num); 2104 virtqueue_vring_attach_packed(vq, &vring_packed); 2105 2106 return 0; 2107 2108 err_state_extra: 2109 vring_free_packed(&vring_packed, vdev, vring_dma_dev(vq)); 2110 err_ring: 2111 virtqueue_reinit_packed(vq); 2112 return -ENOMEM; 2113 } 2114 2115 2116 /* 2117 * Generic functions and exported symbols. 2118 */ 2119 2120 static inline int virtqueue_add(struct virtqueue *_vq, 2121 struct scatterlist *sgs[], 2122 unsigned int total_sg, 2123 unsigned int out_sgs, 2124 unsigned int in_sgs, 2125 void *data, 2126 void *ctx, 2127 gfp_t gfp) 2128 { 2129 struct vring_virtqueue *vq = to_vvq(_vq); 2130 2131 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 2132 out_sgs, in_sgs, data, ctx, gfp) : 2133 virtqueue_add_split(_vq, sgs, total_sg, 2134 out_sgs, in_sgs, data, ctx, gfp); 2135 } 2136 2137 /** 2138 * virtqueue_add_sgs - expose buffers to other end 2139 * @_vq: the struct virtqueue we're talking about. 2140 * @sgs: array of terminated scatterlists. 2141 * @out_sgs: the number of scatterlists readable by other side 2142 * @in_sgs: the number of scatterlists which are writable (after readable ones) 2143 * @data: the token identifying the buffer. 2144 * @gfp: how to do memory allocations (if necessary). 2145 * 2146 * Caller must ensure we don't call this with other virtqueue operations 2147 * at the same time (except where noted). 2148 * 2149 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2150 */ 2151 int virtqueue_add_sgs(struct virtqueue *_vq, 2152 struct scatterlist *sgs[], 2153 unsigned int out_sgs, 2154 unsigned int in_sgs, 2155 void *data, 2156 gfp_t gfp) 2157 { 2158 unsigned int i, total_sg = 0; 2159 2160 /* Count them first. */ 2161 for (i = 0; i < out_sgs + in_sgs; i++) { 2162 struct scatterlist *sg; 2163 2164 for (sg = sgs[i]; sg; sg = sg_next(sg)) 2165 total_sg++; 2166 } 2167 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 2168 data, NULL, gfp); 2169 } 2170 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 2171 2172 /** 2173 * virtqueue_add_outbuf - expose output buffers to other end 2174 * @vq: the struct virtqueue we're talking about. 2175 * @sg: scatterlist (must be well-formed and terminated!) 2176 * @num: the number of entries in @sg readable by other side 2177 * @data: the token identifying the buffer. 2178 * @gfp: how to do memory allocations (if necessary). 2179 * 2180 * Caller must ensure we don't call this with other virtqueue operations 2181 * at the same time (except where noted). 2182 * 2183 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2184 */ 2185 int virtqueue_add_outbuf(struct virtqueue *vq, 2186 struct scatterlist *sg, unsigned int num, 2187 void *data, 2188 gfp_t gfp) 2189 { 2190 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 2191 } 2192 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 2193 2194 /** 2195 * virtqueue_add_inbuf - expose input buffers to other end 2196 * @vq: the struct virtqueue we're talking about. 2197 * @sg: scatterlist (must be well-formed and terminated!) 2198 * @num: the number of entries in @sg writable by other side 2199 * @data: the token identifying the buffer. 2200 * @gfp: how to do memory allocations (if necessary). 2201 * 2202 * Caller must ensure we don't call this with other virtqueue operations 2203 * at the same time (except where noted). 2204 * 2205 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2206 */ 2207 int virtqueue_add_inbuf(struct virtqueue *vq, 2208 struct scatterlist *sg, unsigned int num, 2209 void *data, 2210 gfp_t gfp) 2211 { 2212 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 2213 } 2214 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 2215 2216 /** 2217 * virtqueue_add_inbuf_ctx - expose input buffers to other end 2218 * @vq: the struct virtqueue we're talking about. 2219 * @sg: scatterlist (must be well-formed and terminated!) 2220 * @num: the number of entries in @sg writable by other side 2221 * @data: the token identifying the buffer. 2222 * @ctx: extra context for the token 2223 * @gfp: how to do memory allocations (if necessary). 2224 * 2225 * Caller must ensure we don't call this with other virtqueue operations 2226 * at the same time (except where noted). 2227 * 2228 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2229 */ 2230 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 2231 struct scatterlist *sg, unsigned int num, 2232 void *data, 2233 void *ctx, 2234 gfp_t gfp) 2235 { 2236 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 2237 } 2238 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 2239 2240 /** 2241 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 2242 * @_vq: the struct virtqueue 2243 * 2244 * Instead of virtqueue_kick(), you can do: 2245 * if (virtqueue_kick_prepare(vq)) 2246 * virtqueue_notify(vq); 2247 * 2248 * This is sometimes useful because the virtqueue_kick_prepare() needs 2249 * to be serialized, but the actual virtqueue_notify() call does not. 2250 */ 2251 bool virtqueue_kick_prepare(struct virtqueue *_vq) 2252 { 2253 struct vring_virtqueue *vq = to_vvq(_vq); 2254 2255 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 2256 virtqueue_kick_prepare_split(_vq); 2257 } 2258 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 2259 2260 /** 2261 * virtqueue_notify - second half of split virtqueue_kick call. 2262 * @_vq: the struct virtqueue 2263 * 2264 * This does not need to be serialized. 2265 * 2266 * Returns false if host notify failed or queue is broken, otherwise true. 2267 */ 2268 bool virtqueue_notify(struct virtqueue *_vq) 2269 { 2270 struct vring_virtqueue *vq = to_vvq(_vq); 2271 2272 if (unlikely(vq->broken)) 2273 return false; 2274 2275 /* Prod other side to tell it about changes. */ 2276 if (!vq->notify(_vq)) { 2277 vq->broken = true; 2278 return false; 2279 } 2280 return true; 2281 } 2282 EXPORT_SYMBOL_GPL(virtqueue_notify); 2283 2284 /** 2285 * virtqueue_kick - update after add_buf 2286 * @vq: the struct virtqueue 2287 * 2288 * After one or more virtqueue_add_* calls, invoke this to kick 2289 * the other side. 2290 * 2291 * Caller must ensure we don't call this with other virtqueue 2292 * operations at the same time (except where noted). 2293 * 2294 * Returns false if kick failed, otherwise true. 2295 */ 2296 bool virtqueue_kick(struct virtqueue *vq) 2297 { 2298 if (virtqueue_kick_prepare(vq)) 2299 return virtqueue_notify(vq); 2300 return true; 2301 } 2302 EXPORT_SYMBOL_GPL(virtqueue_kick); 2303 2304 /** 2305 * virtqueue_get_buf_ctx - get the next used buffer 2306 * @_vq: the struct virtqueue we're talking about. 2307 * @len: the length written into the buffer 2308 * @ctx: extra context for the token 2309 * 2310 * If the device wrote data into the buffer, @len will be set to the 2311 * amount written. This means you don't need to clear the buffer 2312 * beforehand to ensure there's no data leakage in the case of short 2313 * writes. 2314 * 2315 * Caller must ensure we don't call this with other virtqueue 2316 * operations at the same time (except where noted). 2317 * 2318 * Returns NULL if there are no used buffers, or the "data" token 2319 * handed to virtqueue_add_*(). 2320 */ 2321 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 2322 void **ctx) 2323 { 2324 struct vring_virtqueue *vq = to_vvq(_vq); 2325 2326 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 2327 virtqueue_get_buf_ctx_split(_vq, len, ctx); 2328 } 2329 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 2330 2331 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 2332 { 2333 return virtqueue_get_buf_ctx(_vq, len, NULL); 2334 } 2335 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 2336 /** 2337 * virtqueue_disable_cb - disable callbacks 2338 * @_vq: the struct virtqueue we're talking about. 2339 * 2340 * Note that this is not necessarily synchronous, hence unreliable and only 2341 * useful as an optimization. 2342 * 2343 * Unlike other operations, this need not be serialized. 2344 */ 2345 void virtqueue_disable_cb(struct virtqueue *_vq) 2346 { 2347 struct vring_virtqueue *vq = to_vvq(_vq); 2348 2349 if (vq->packed_ring) 2350 virtqueue_disable_cb_packed(_vq); 2351 else 2352 virtqueue_disable_cb_split(_vq); 2353 } 2354 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 2355 2356 /** 2357 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 2358 * @_vq: the struct virtqueue we're talking about. 2359 * 2360 * This re-enables callbacks; it returns current queue state 2361 * in an opaque unsigned value. This value should be later tested by 2362 * virtqueue_poll, to detect a possible race between the driver checking for 2363 * more work, and enabling callbacks. 2364 * 2365 * Caller must ensure we don't call this with other virtqueue 2366 * operations at the same time (except where noted). 2367 */ 2368 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq) 2369 { 2370 struct vring_virtqueue *vq = to_vvq(_vq); 2371 2372 if (vq->event_triggered) 2373 vq->event_triggered = false; 2374 2375 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 2376 virtqueue_enable_cb_prepare_split(_vq); 2377 } 2378 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 2379 2380 /** 2381 * virtqueue_poll - query pending used buffers 2382 * @_vq: the struct virtqueue we're talking about. 2383 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 2384 * 2385 * Returns "true" if there are pending used buffers in the queue. 2386 * 2387 * This does not need to be serialized. 2388 */ 2389 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx) 2390 { 2391 struct vring_virtqueue *vq = to_vvq(_vq); 2392 2393 if (unlikely(vq->broken)) 2394 return false; 2395 2396 virtio_mb(vq->weak_barriers); 2397 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 2398 virtqueue_poll_split(_vq, last_used_idx); 2399 } 2400 EXPORT_SYMBOL_GPL(virtqueue_poll); 2401 2402 /** 2403 * virtqueue_enable_cb - restart callbacks after disable_cb. 2404 * @_vq: the struct virtqueue we're talking about. 2405 * 2406 * This re-enables callbacks; it returns "false" if there are pending 2407 * buffers in the queue, to detect a possible race between the driver 2408 * checking for more work, and enabling callbacks. 2409 * 2410 * Caller must ensure we don't call this with other virtqueue 2411 * operations at the same time (except where noted). 2412 */ 2413 bool virtqueue_enable_cb(struct virtqueue *_vq) 2414 { 2415 unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq); 2416 2417 return !virtqueue_poll(_vq, last_used_idx); 2418 } 2419 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 2420 2421 /** 2422 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 2423 * @_vq: the struct virtqueue we're talking about. 2424 * 2425 * This re-enables callbacks but hints to the other side to delay 2426 * interrupts until most of the available buffers have been processed; 2427 * it returns "false" if there are many pending buffers in the queue, 2428 * to detect a possible race between the driver checking for more work, 2429 * and enabling callbacks. 2430 * 2431 * Caller must ensure we don't call this with other virtqueue 2432 * operations at the same time (except where noted). 2433 */ 2434 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2435 { 2436 struct vring_virtqueue *vq = to_vvq(_vq); 2437 2438 if (vq->event_triggered) 2439 vq->event_triggered = false; 2440 2441 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2442 virtqueue_enable_cb_delayed_split(_vq); 2443 } 2444 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2445 2446 /** 2447 * virtqueue_detach_unused_buf - detach first unused buffer 2448 * @_vq: the struct virtqueue we're talking about. 2449 * 2450 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2451 * This is not valid on an active queue; it is useful for device 2452 * shutdown or the reset queue. 2453 */ 2454 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2455 { 2456 struct vring_virtqueue *vq = to_vvq(_vq); 2457 2458 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2459 virtqueue_detach_unused_buf_split(_vq); 2460 } 2461 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2462 2463 static inline bool more_used(const struct vring_virtqueue *vq) 2464 { 2465 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2466 } 2467 2468 /** 2469 * vring_interrupt - notify a virtqueue on an interrupt 2470 * @irq: the IRQ number (ignored) 2471 * @_vq: the struct virtqueue to notify 2472 * 2473 * Calls the callback function of @_vq to process the virtqueue 2474 * notification. 2475 */ 2476 irqreturn_t vring_interrupt(int irq, void *_vq) 2477 { 2478 struct vring_virtqueue *vq = to_vvq(_vq); 2479 2480 if (!more_used(vq)) { 2481 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2482 return IRQ_NONE; 2483 } 2484 2485 if (unlikely(vq->broken)) { 2486 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 2487 dev_warn_once(&vq->vq.vdev->dev, 2488 "virtio vring IRQ raised before DRIVER_OK"); 2489 return IRQ_NONE; 2490 #else 2491 return IRQ_HANDLED; 2492 #endif 2493 } 2494 2495 /* Just a hint for performance: so it's ok that this can be racy! */ 2496 if (vq->event) 2497 vq->event_triggered = true; 2498 2499 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2500 if (vq->vq.callback) 2501 vq->vq.callback(&vq->vq); 2502 2503 return IRQ_HANDLED; 2504 } 2505 EXPORT_SYMBOL_GPL(vring_interrupt); 2506 2507 /* Only available for split ring */ 2508 static struct virtqueue *__vring_new_virtqueue(unsigned int index, 2509 struct vring_virtqueue_split *vring_split, 2510 struct virtio_device *vdev, 2511 bool weak_barriers, 2512 bool context, 2513 bool (*notify)(struct virtqueue *), 2514 void (*callback)(struct virtqueue *), 2515 const char *name, 2516 struct device *dma_dev) 2517 { 2518 struct vring_virtqueue *vq; 2519 int err; 2520 2521 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2522 return NULL; 2523 2524 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2525 if (!vq) 2526 return NULL; 2527 2528 vq->packed_ring = false; 2529 vq->vq.callback = callback; 2530 vq->vq.vdev = vdev; 2531 vq->vq.name = name; 2532 vq->vq.index = index; 2533 vq->vq.reset = false; 2534 vq->we_own_ring = false; 2535 vq->notify = notify; 2536 vq->weak_barriers = weak_barriers; 2537 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 2538 vq->broken = true; 2539 #else 2540 vq->broken = false; 2541 #endif 2542 vq->dma_dev = dma_dev; 2543 vq->use_dma_api = vring_use_dma_api(vdev); 2544 2545 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2546 !context; 2547 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2548 2549 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2550 vq->weak_barriers = false; 2551 2552 err = vring_alloc_state_extra_split(vring_split); 2553 if (err) { 2554 kfree(vq); 2555 return NULL; 2556 } 2557 2558 virtqueue_vring_init_split(vring_split, vq); 2559 2560 virtqueue_init(vq, vring_split->vring.num); 2561 virtqueue_vring_attach_split(vq, vring_split); 2562 2563 spin_lock(&vdev->vqs_list_lock); 2564 list_add_tail(&vq->vq.list, &vdev->vqs); 2565 spin_unlock(&vdev->vqs_list_lock); 2566 return &vq->vq; 2567 } 2568 2569 struct virtqueue *vring_create_virtqueue( 2570 unsigned int index, 2571 unsigned int num, 2572 unsigned int vring_align, 2573 struct virtio_device *vdev, 2574 bool weak_barriers, 2575 bool may_reduce_num, 2576 bool context, 2577 bool (*notify)(struct virtqueue *), 2578 void (*callback)(struct virtqueue *), 2579 const char *name) 2580 { 2581 2582 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2583 return vring_create_virtqueue_packed(index, num, vring_align, 2584 vdev, weak_barriers, may_reduce_num, 2585 context, notify, callback, name, vdev->dev.parent); 2586 2587 return vring_create_virtqueue_split(index, num, vring_align, 2588 vdev, weak_barriers, may_reduce_num, 2589 context, notify, callback, name, vdev->dev.parent); 2590 } 2591 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2592 2593 struct virtqueue *vring_create_virtqueue_dma( 2594 unsigned int index, 2595 unsigned int num, 2596 unsigned int vring_align, 2597 struct virtio_device *vdev, 2598 bool weak_barriers, 2599 bool may_reduce_num, 2600 bool context, 2601 bool (*notify)(struct virtqueue *), 2602 void (*callback)(struct virtqueue *), 2603 const char *name, 2604 struct device *dma_dev) 2605 { 2606 2607 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2608 return vring_create_virtqueue_packed(index, num, vring_align, 2609 vdev, weak_barriers, may_reduce_num, 2610 context, notify, callback, name, dma_dev); 2611 2612 return vring_create_virtqueue_split(index, num, vring_align, 2613 vdev, weak_barriers, may_reduce_num, 2614 context, notify, callback, name, dma_dev); 2615 } 2616 EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); 2617 2618 /** 2619 * virtqueue_resize - resize the vring of vq 2620 * @_vq: the struct virtqueue we're talking about. 2621 * @num: new ring num 2622 * @recycle: callback for recycle the useless buffer 2623 * 2624 * When it is really necessary to create a new vring, it will set the current vq 2625 * into the reset state. Then call the passed callback to recycle the buffer 2626 * that is no longer used. Only after the new vring is successfully created, the 2627 * old vring will be released. 2628 * 2629 * Caller must ensure we don't call this with other virtqueue operations 2630 * at the same time (except where noted). 2631 * 2632 * Returns zero or a negative error. 2633 * 0: success. 2634 * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size. 2635 * vq can still work normally 2636 * -EBUSY: Failed to sync with device, vq may not work properly 2637 * -ENOENT: Transport or device not supported 2638 * -E2BIG/-EINVAL: num error 2639 * -EPERM: Operation not permitted 2640 * 2641 */ 2642 int virtqueue_resize(struct virtqueue *_vq, u32 num, 2643 void (*recycle)(struct virtqueue *vq, void *buf)) 2644 { 2645 struct vring_virtqueue *vq = to_vvq(_vq); 2646 struct virtio_device *vdev = vq->vq.vdev; 2647 void *buf; 2648 int err; 2649 2650 if (!vq->we_own_ring) 2651 return -EPERM; 2652 2653 if (num > vq->vq.num_max) 2654 return -E2BIG; 2655 2656 if (!num) 2657 return -EINVAL; 2658 2659 if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num) 2660 return 0; 2661 2662 if (!vdev->config->disable_vq_and_reset) 2663 return -ENOENT; 2664 2665 if (!vdev->config->enable_vq_after_reset) 2666 return -ENOENT; 2667 2668 err = vdev->config->disable_vq_and_reset(_vq); 2669 if (err) 2670 return err; 2671 2672 while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) 2673 recycle(_vq, buf); 2674 2675 if (vq->packed_ring) 2676 err = virtqueue_resize_packed(_vq, num); 2677 else 2678 err = virtqueue_resize_split(_vq, num); 2679 2680 if (vdev->config->enable_vq_after_reset(_vq)) 2681 return -EBUSY; 2682 2683 return err; 2684 } 2685 EXPORT_SYMBOL_GPL(virtqueue_resize); 2686 2687 /* Only available for split ring */ 2688 struct virtqueue *vring_new_virtqueue(unsigned int index, 2689 unsigned int num, 2690 unsigned int vring_align, 2691 struct virtio_device *vdev, 2692 bool weak_barriers, 2693 bool context, 2694 void *pages, 2695 bool (*notify)(struct virtqueue *vq), 2696 void (*callback)(struct virtqueue *vq), 2697 const char *name) 2698 { 2699 struct vring_virtqueue_split vring_split = {}; 2700 2701 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2702 return NULL; 2703 2704 vring_init(&vring_split.vring, num, pages, vring_align); 2705 return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers, 2706 context, notify, callback, name, 2707 vdev->dev.parent); 2708 } 2709 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2710 2711 static void vring_free(struct virtqueue *_vq) 2712 { 2713 struct vring_virtqueue *vq = to_vvq(_vq); 2714 2715 if (vq->we_own_ring) { 2716 if (vq->packed_ring) { 2717 vring_free_queue(vq->vq.vdev, 2718 vq->packed.ring_size_in_bytes, 2719 vq->packed.vring.desc, 2720 vq->packed.ring_dma_addr, 2721 vring_dma_dev(vq)); 2722 2723 vring_free_queue(vq->vq.vdev, 2724 vq->packed.event_size_in_bytes, 2725 vq->packed.vring.driver, 2726 vq->packed.driver_event_dma_addr, 2727 vring_dma_dev(vq)); 2728 2729 vring_free_queue(vq->vq.vdev, 2730 vq->packed.event_size_in_bytes, 2731 vq->packed.vring.device, 2732 vq->packed.device_event_dma_addr, 2733 vring_dma_dev(vq)); 2734 2735 kfree(vq->packed.desc_state); 2736 kfree(vq->packed.desc_extra); 2737 } else { 2738 vring_free_queue(vq->vq.vdev, 2739 vq->split.queue_size_in_bytes, 2740 vq->split.vring.desc, 2741 vq->split.queue_dma_addr, 2742 vring_dma_dev(vq)); 2743 } 2744 } 2745 if (!vq->packed_ring) { 2746 kfree(vq->split.desc_state); 2747 kfree(vq->split.desc_extra); 2748 } 2749 } 2750 2751 void vring_del_virtqueue(struct virtqueue *_vq) 2752 { 2753 struct vring_virtqueue *vq = to_vvq(_vq); 2754 2755 spin_lock(&vq->vq.vdev->vqs_list_lock); 2756 list_del(&_vq->list); 2757 spin_unlock(&vq->vq.vdev->vqs_list_lock); 2758 2759 vring_free(_vq); 2760 2761 kfree(vq); 2762 } 2763 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2764 2765 u32 vring_notification_data(struct virtqueue *_vq) 2766 { 2767 struct vring_virtqueue *vq = to_vvq(_vq); 2768 u16 next; 2769 2770 if (vq->packed_ring) 2771 next = (vq->packed.next_avail_idx & 2772 ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) | 2773 vq->packed.avail_wrap_counter << 2774 VRING_PACKED_EVENT_F_WRAP_CTR; 2775 else 2776 next = vq->split.avail_idx_shadow; 2777 2778 return next << 16 | _vq->index; 2779 } 2780 EXPORT_SYMBOL_GPL(vring_notification_data); 2781 2782 /* Manipulates transport-specific feature bits. */ 2783 void vring_transport_features(struct virtio_device *vdev) 2784 { 2785 unsigned int i; 2786 2787 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2788 switch (i) { 2789 case VIRTIO_RING_F_INDIRECT_DESC: 2790 break; 2791 case VIRTIO_RING_F_EVENT_IDX: 2792 break; 2793 case VIRTIO_F_VERSION_1: 2794 break; 2795 case VIRTIO_F_ACCESS_PLATFORM: 2796 break; 2797 case VIRTIO_F_RING_PACKED: 2798 break; 2799 case VIRTIO_F_ORDER_PLATFORM: 2800 break; 2801 case VIRTIO_F_NOTIFICATION_DATA: 2802 break; 2803 default: 2804 /* We don't understand this bit. */ 2805 __virtio_clear_bit(vdev, i); 2806 } 2807 } 2808 } 2809 EXPORT_SYMBOL_GPL(vring_transport_features); 2810 2811 /** 2812 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2813 * @_vq: the struct virtqueue containing the vring of interest. 2814 * 2815 * Returns the size of the vring. This is mainly used for boasting to 2816 * userspace. Unlike other operations, this need not be serialized. 2817 */ 2818 unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq) 2819 { 2820 2821 const struct vring_virtqueue *vq = to_vvq(_vq); 2822 2823 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2824 } 2825 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2826 2827 /* 2828 * This function should only be called by the core, not directly by the driver. 2829 */ 2830 void __virtqueue_break(struct virtqueue *_vq) 2831 { 2832 struct vring_virtqueue *vq = to_vvq(_vq); 2833 2834 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2835 WRITE_ONCE(vq->broken, true); 2836 } 2837 EXPORT_SYMBOL_GPL(__virtqueue_break); 2838 2839 /* 2840 * This function should only be called by the core, not directly by the driver. 2841 */ 2842 void __virtqueue_unbreak(struct virtqueue *_vq) 2843 { 2844 struct vring_virtqueue *vq = to_vvq(_vq); 2845 2846 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2847 WRITE_ONCE(vq->broken, false); 2848 } 2849 EXPORT_SYMBOL_GPL(__virtqueue_unbreak); 2850 2851 bool virtqueue_is_broken(const struct virtqueue *_vq) 2852 { 2853 const struct vring_virtqueue *vq = to_vvq(_vq); 2854 2855 return READ_ONCE(vq->broken); 2856 } 2857 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2858 2859 /* 2860 * This should prevent the device from being used, allowing drivers to 2861 * recover. You may need to grab appropriate locks to flush. 2862 */ 2863 void virtio_break_device(struct virtio_device *dev) 2864 { 2865 struct virtqueue *_vq; 2866 2867 spin_lock(&dev->vqs_list_lock); 2868 list_for_each_entry(_vq, &dev->vqs, list) { 2869 struct vring_virtqueue *vq = to_vvq(_vq); 2870 2871 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2872 WRITE_ONCE(vq->broken, true); 2873 } 2874 spin_unlock(&dev->vqs_list_lock); 2875 } 2876 EXPORT_SYMBOL_GPL(virtio_break_device); 2877 2878 /* 2879 * This should allow the device to be used by the driver. You may 2880 * need to grab appropriate locks to flush the write to 2881 * vq->broken. This should only be used in some specific case e.g 2882 * (probing and restoring). This function should only be called by the 2883 * core, not directly by the driver. 2884 */ 2885 void __virtio_unbreak_device(struct virtio_device *dev) 2886 { 2887 struct virtqueue *_vq; 2888 2889 spin_lock(&dev->vqs_list_lock); 2890 list_for_each_entry(_vq, &dev->vqs, list) { 2891 struct vring_virtqueue *vq = to_vvq(_vq); 2892 2893 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2894 WRITE_ONCE(vq->broken, false); 2895 } 2896 spin_unlock(&dev->vqs_list_lock); 2897 } 2898 EXPORT_SYMBOL_GPL(__virtio_unbreak_device); 2899 2900 dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq) 2901 { 2902 const struct vring_virtqueue *vq = to_vvq(_vq); 2903 2904 BUG_ON(!vq->we_own_ring); 2905 2906 if (vq->packed_ring) 2907 return vq->packed.ring_dma_addr; 2908 2909 return vq->split.queue_dma_addr; 2910 } 2911 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2912 2913 dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq) 2914 { 2915 const struct vring_virtqueue *vq = to_vvq(_vq); 2916 2917 BUG_ON(!vq->we_own_ring); 2918 2919 if (vq->packed_ring) 2920 return vq->packed.driver_event_dma_addr; 2921 2922 return vq->split.queue_dma_addr + 2923 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2924 } 2925 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2926 2927 dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq) 2928 { 2929 const struct vring_virtqueue *vq = to_vvq(_vq); 2930 2931 BUG_ON(!vq->we_own_ring); 2932 2933 if (vq->packed_ring) 2934 return vq->packed.device_event_dma_addr; 2935 2936 return vq->split.queue_dma_addr + 2937 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2938 } 2939 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2940 2941 /* Only available for split ring */ 2942 const struct vring *virtqueue_get_vring(const struct virtqueue *vq) 2943 { 2944 return &to_vvq(vq)->split.vring; 2945 } 2946 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2947 2948 MODULE_LICENSE("GPL"); 2949