1 /* Virtio ring implementation. 2 * 3 * Copyright 2007 Rusty Russell IBM Corporation 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 */ 19 #include <linux/virtio.h> 20 #include <linux/virtio_ring.h> 21 #include <linux/virtio_config.h> 22 #include <linux/device.h> 23 #include <linux/slab.h> 24 #include <linux/module.h> 25 #include <linux/hrtimer.h> 26 #include <linux/dma-mapping.h> 27 #include <xen/xen.h> 28 29 #ifdef DEBUG 30 /* For development, we want to crash whenever the ring is screwed. */ 31 #define BAD_RING(_vq, fmt, args...) \ 32 do { \ 33 dev_err(&(_vq)->vq.vdev->dev, \ 34 "%s:"fmt, (_vq)->vq.name, ##args); \ 35 BUG(); \ 36 } while (0) 37 /* Caller is supposed to guarantee no reentry. */ 38 #define START_USE(_vq) \ 39 do { \ 40 if ((_vq)->in_use) \ 41 panic("%s:in_use = %i\n", \ 42 (_vq)->vq.name, (_vq)->in_use); \ 43 (_vq)->in_use = __LINE__; \ 44 } while (0) 45 #define END_USE(_vq) \ 46 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 47 #define LAST_ADD_TIME_UPDATE(_vq) \ 48 do { \ 49 ktime_t now = ktime_get(); \ 50 \ 51 /* No kick or get, with .1 second between? Warn. */ \ 52 if ((_vq)->last_add_time_valid) \ 53 WARN_ON(ktime_to_ms(ktime_sub(now, \ 54 (_vq)->last_add_time)) > 100); \ 55 (_vq)->last_add_time = now; \ 56 (_vq)->last_add_time_valid = true; \ 57 } while (0) 58 #define LAST_ADD_TIME_CHECK(_vq) \ 59 do { \ 60 if ((_vq)->last_add_time_valid) { \ 61 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 62 (_vq)->last_add_time)) > 100); \ 63 } \ 64 } while (0) 65 #define LAST_ADD_TIME_INVALID(_vq) \ 66 ((_vq)->last_add_time_valid = false) 67 #else 68 #define BAD_RING(_vq, fmt, args...) \ 69 do { \ 70 dev_err(&_vq->vq.vdev->dev, \ 71 "%s:"fmt, (_vq)->vq.name, ##args); \ 72 (_vq)->broken = true; \ 73 } while (0) 74 #define START_USE(vq) 75 #define END_USE(vq) 76 #define LAST_ADD_TIME_UPDATE(vq) 77 #define LAST_ADD_TIME_CHECK(vq) 78 #define LAST_ADD_TIME_INVALID(vq) 79 #endif 80 81 struct vring_desc_state_split { 82 void *data; /* Data for callback. */ 83 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 84 }; 85 86 struct vring_desc_state_packed { 87 void *data; /* Data for callback. */ 88 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 89 u16 num; /* Descriptor list length. */ 90 u16 next; /* The next desc state in a list. */ 91 u16 last; /* The last desc state in a list. */ 92 }; 93 94 struct vring_desc_extra_packed { 95 dma_addr_t addr; /* Buffer DMA addr. */ 96 u32 len; /* Buffer length. */ 97 u16 flags; /* Descriptor flags. */ 98 }; 99 100 struct vring_virtqueue { 101 struct virtqueue vq; 102 103 /* Is this a packed ring? */ 104 bool packed_ring; 105 106 /* Is DMA API used? */ 107 bool use_dma_api; 108 109 /* Can we use weak barriers? */ 110 bool weak_barriers; 111 112 /* Other side has made a mess, don't try any more. */ 113 bool broken; 114 115 /* Host supports indirect buffers */ 116 bool indirect; 117 118 /* Host publishes avail event idx */ 119 bool event; 120 121 /* Head of free buffer list. */ 122 unsigned int free_head; 123 /* Number we've added since last sync. */ 124 unsigned int num_added; 125 126 /* Last used index we've seen. */ 127 u16 last_used_idx; 128 129 union { 130 /* Available for split ring */ 131 struct { 132 /* Actual memory layout for this queue. */ 133 struct vring vring; 134 135 /* Last written value to avail->flags */ 136 u16 avail_flags_shadow; 137 138 /* 139 * Last written value to avail->idx in 140 * guest byte order. 141 */ 142 u16 avail_idx_shadow; 143 144 /* Per-descriptor state. */ 145 struct vring_desc_state_split *desc_state; 146 147 /* DMA address and size information */ 148 dma_addr_t queue_dma_addr; 149 size_t queue_size_in_bytes; 150 } split; 151 152 /* Available for packed ring */ 153 struct { 154 /* Actual memory layout for this queue. */ 155 struct { 156 unsigned int num; 157 struct vring_packed_desc *desc; 158 struct vring_packed_desc_event *driver; 159 struct vring_packed_desc_event *device; 160 } vring; 161 162 /* Driver ring wrap counter. */ 163 bool avail_wrap_counter; 164 165 /* Device ring wrap counter. */ 166 bool used_wrap_counter; 167 168 /* Avail used flags. */ 169 u16 avail_used_flags; 170 171 /* Index of the next avail descriptor. */ 172 u16 next_avail_idx; 173 174 /* 175 * Last written value to driver->flags in 176 * guest byte order. 177 */ 178 u16 event_flags_shadow; 179 180 /* Per-descriptor state. */ 181 struct vring_desc_state_packed *desc_state; 182 struct vring_desc_extra_packed *desc_extra; 183 184 /* DMA address and size information */ 185 dma_addr_t ring_dma_addr; 186 dma_addr_t driver_event_dma_addr; 187 dma_addr_t device_event_dma_addr; 188 size_t ring_size_in_bytes; 189 size_t event_size_in_bytes; 190 } packed; 191 }; 192 193 /* How to notify other side. FIXME: commonalize hcalls! */ 194 bool (*notify)(struct virtqueue *vq); 195 196 /* DMA, allocation, and size information */ 197 bool we_own_ring; 198 199 #ifdef DEBUG 200 /* They're supposed to lock for us. */ 201 unsigned int in_use; 202 203 /* Figure out if their kicks are too delayed. */ 204 bool last_add_time_valid; 205 ktime_t last_add_time; 206 #endif 207 }; 208 209 210 /* 211 * Helpers. 212 */ 213 214 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 215 216 static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 217 unsigned int total_sg) 218 { 219 struct vring_virtqueue *vq = to_vvq(_vq); 220 221 /* 222 * If the host supports indirect descriptor tables, and we have multiple 223 * buffers, then go indirect. FIXME: tune this threshold 224 */ 225 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 226 } 227 228 /* 229 * Modern virtio devices have feature bits to specify whether they need a 230 * quirk and bypass the IOMMU. If not there, just use the DMA API. 231 * 232 * If there, the interaction between virtio and DMA API is messy. 233 * 234 * On most systems with virtio, physical addresses match bus addresses, 235 * and it doesn't particularly matter whether we use the DMA API. 236 * 237 * On some systems, including Xen and any system with a physical device 238 * that speaks virtio behind a physical IOMMU, we must use the DMA API 239 * for virtio DMA to work at all. 240 * 241 * On other systems, including SPARC and PPC64, virtio-pci devices are 242 * enumerated as though they are behind an IOMMU, but the virtio host 243 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 244 * there or somehow map everything as the identity. 245 * 246 * For the time being, we preserve historic behavior and bypass the DMA 247 * API. 248 * 249 * TODO: install a per-device DMA ops structure that does the right thing 250 * taking into account all the above quirks, and use the DMA API 251 * unconditionally on data path. 252 */ 253 254 static bool vring_use_dma_api(struct virtio_device *vdev) 255 { 256 if (!virtio_has_iommu_quirk(vdev)) 257 return true; 258 259 /* Otherwise, we are left to guess. */ 260 /* 261 * In theory, it's possible to have a buggy QEMU-supposed 262 * emulated Q35 IOMMU and Xen enabled at the same time. On 263 * such a configuration, virtio has never worked and will 264 * not work without an even larger kludge. Instead, enable 265 * the DMA API if we're a Xen guest, which at least allows 266 * all of the sensible Xen configurations to work correctly. 267 */ 268 if (xen_domain()) 269 return true; 270 271 return false; 272 } 273 274 size_t virtio_max_dma_size(struct virtio_device *vdev) 275 { 276 size_t max_segment_size = SIZE_MAX; 277 278 if (vring_use_dma_api(vdev)) 279 max_segment_size = dma_max_mapping_size(&vdev->dev); 280 281 return max_segment_size; 282 } 283 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 284 285 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 286 dma_addr_t *dma_handle, gfp_t flag) 287 { 288 if (vring_use_dma_api(vdev)) { 289 return dma_alloc_coherent(vdev->dev.parent, size, 290 dma_handle, flag); 291 } else { 292 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 293 294 if (queue) { 295 phys_addr_t phys_addr = virt_to_phys(queue); 296 *dma_handle = (dma_addr_t)phys_addr; 297 298 /* 299 * Sanity check: make sure we dind't truncate 300 * the address. The only arches I can find that 301 * have 64-bit phys_addr_t but 32-bit dma_addr_t 302 * are certain non-highmem MIPS and x86 303 * configurations, but these configurations 304 * should never allocate physical pages above 32 305 * bits, so this is fine. Just in case, throw a 306 * warning and abort if we end up with an 307 * unrepresentable address. 308 */ 309 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 310 free_pages_exact(queue, PAGE_ALIGN(size)); 311 return NULL; 312 } 313 } 314 return queue; 315 } 316 } 317 318 static void vring_free_queue(struct virtio_device *vdev, size_t size, 319 void *queue, dma_addr_t dma_handle) 320 { 321 if (vring_use_dma_api(vdev)) 322 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 323 else 324 free_pages_exact(queue, PAGE_ALIGN(size)); 325 } 326 327 /* 328 * The DMA ops on various arches are rather gnarly right now, and 329 * making all of the arch DMA ops work on the vring device itself 330 * is a mess. For now, we use the parent device for DMA ops. 331 */ 332 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 333 { 334 return vq->vq.vdev->dev.parent; 335 } 336 337 /* Map one sg entry. */ 338 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 339 struct scatterlist *sg, 340 enum dma_data_direction direction) 341 { 342 if (!vq->use_dma_api) 343 return (dma_addr_t)sg_phys(sg); 344 345 /* 346 * We can't use dma_map_sg, because we don't use scatterlists in 347 * the way it expects (we don't guarantee that the scatterlist 348 * will exist for the lifetime of the mapping). 349 */ 350 return dma_map_page(vring_dma_dev(vq), 351 sg_page(sg), sg->offset, sg->length, 352 direction); 353 } 354 355 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 356 void *cpu_addr, size_t size, 357 enum dma_data_direction direction) 358 { 359 if (!vq->use_dma_api) 360 return (dma_addr_t)virt_to_phys(cpu_addr); 361 362 return dma_map_single(vring_dma_dev(vq), 363 cpu_addr, size, direction); 364 } 365 366 static int vring_mapping_error(const struct vring_virtqueue *vq, 367 dma_addr_t addr) 368 { 369 if (!vq->use_dma_api) 370 return 0; 371 372 return dma_mapping_error(vring_dma_dev(vq), addr); 373 } 374 375 376 /* 377 * Split ring specific functions - *_split(). 378 */ 379 380 static void vring_unmap_one_split(const struct vring_virtqueue *vq, 381 struct vring_desc *desc) 382 { 383 u16 flags; 384 385 if (!vq->use_dma_api) 386 return; 387 388 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 389 390 if (flags & VRING_DESC_F_INDIRECT) { 391 dma_unmap_single(vring_dma_dev(vq), 392 virtio64_to_cpu(vq->vq.vdev, desc->addr), 393 virtio32_to_cpu(vq->vq.vdev, desc->len), 394 (flags & VRING_DESC_F_WRITE) ? 395 DMA_FROM_DEVICE : DMA_TO_DEVICE); 396 } else { 397 dma_unmap_page(vring_dma_dev(vq), 398 virtio64_to_cpu(vq->vq.vdev, desc->addr), 399 virtio32_to_cpu(vq->vq.vdev, desc->len), 400 (flags & VRING_DESC_F_WRITE) ? 401 DMA_FROM_DEVICE : DMA_TO_DEVICE); 402 } 403 } 404 405 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 406 unsigned int total_sg, 407 gfp_t gfp) 408 { 409 struct vring_desc *desc; 410 unsigned int i; 411 412 /* 413 * We require lowmem mappings for the descriptors because 414 * otherwise virt_to_phys will give us bogus addresses in the 415 * virtqueue. 416 */ 417 gfp &= ~__GFP_HIGHMEM; 418 419 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 420 if (!desc) 421 return NULL; 422 423 for (i = 0; i < total_sg; i++) 424 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 425 return desc; 426 } 427 428 static inline int virtqueue_add_split(struct virtqueue *_vq, 429 struct scatterlist *sgs[], 430 unsigned int total_sg, 431 unsigned int out_sgs, 432 unsigned int in_sgs, 433 void *data, 434 void *ctx, 435 gfp_t gfp) 436 { 437 struct vring_virtqueue *vq = to_vvq(_vq); 438 struct scatterlist *sg; 439 struct vring_desc *desc; 440 unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx; 441 int head; 442 bool indirect; 443 444 START_USE(vq); 445 446 BUG_ON(data == NULL); 447 BUG_ON(ctx && vq->indirect); 448 449 if (unlikely(vq->broken)) { 450 END_USE(vq); 451 return -EIO; 452 } 453 454 LAST_ADD_TIME_UPDATE(vq); 455 456 BUG_ON(total_sg == 0); 457 458 head = vq->free_head; 459 460 if (virtqueue_use_indirect(_vq, total_sg)) 461 desc = alloc_indirect_split(_vq, total_sg, gfp); 462 else { 463 desc = NULL; 464 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 465 } 466 467 if (desc) { 468 /* Use a single buffer which doesn't continue */ 469 indirect = true; 470 /* Set up rest to use this indirect table. */ 471 i = 0; 472 descs_used = 1; 473 } else { 474 indirect = false; 475 desc = vq->split.vring.desc; 476 i = head; 477 descs_used = total_sg; 478 } 479 480 if (vq->vq.num_free < descs_used) { 481 pr_debug("Can't add buf len %i - avail = %i\n", 482 descs_used, vq->vq.num_free); 483 /* FIXME: for historical reasons, we force a notify here if 484 * there are outgoing parts to the buffer. Presumably the 485 * host should service the ring ASAP. */ 486 if (out_sgs) 487 vq->notify(&vq->vq); 488 if (indirect) 489 kfree(desc); 490 END_USE(vq); 491 return -ENOSPC; 492 } 493 494 for (n = 0; n < out_sgs; n++) { 495 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 496 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 497 if (vring_mapping_error(vq, addr)) 498 goto unmap_release; 499 500 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); 501 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 502 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 503 prev = i; 504 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 505 } 506 } 507 for (; n < (out_sgs + in_sgs); n++) { 508 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 509 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 510 if (vring_mapping_error(vq, addr)) 511 goto unmap_release; 512 513 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); 514 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 515 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 516 prev = i; 517 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 518 } 519 } 520 /* Last one doesn't continue. */ 521 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 522 523 if (indirect) { 524 /* Now that the indirect table is filled in, map it. */ 525 dma_addr_t addr = vring_map_single( 526 vq, desc, total_sg * sizeof(struct vring_desc), 527 DMA_TO_DEVICE); 528 if (vring_mapping_error(vq, addr)) 529 goto unmap_release; 530 531 vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, 532 VRING_DESC_F_INDIRECT); 533 vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, 534 addr); 535 536 vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev, 537 total_sg * sizeof(struct vring_desc)); 538 } 539 540 /* We're using some buffers from the free list. */ 541 vq->vq.num_free -= descs_used; 542 543 /* Update free pointer */ 544 if (indirect) 545 vq->free_head = virtio16_to_cpu(_vq->vdev, 546 vq->split.vring.desc[head].next); 547 else 548 vq->free_head = i; 549 550 /* Store token and indirect buffer state. */ 551 vq->split.desc_state[head].data = data; 552 if (indirect) 553 vq->split.desc_state[head].indir_desc = desc; 554 else 555 vq->split.desc_state[head].indir_desc = ctx; 556 557 /* Put entry in available array (but don't update avail->idx until they 558 * do sync). */ 559 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 560 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 561 562 /* Descriptors and available array need to be set before we expose the 563 * new available array entries. */ 564 virtio_wmb(vq->weak_barriers); 565 vq->split.avail_idx_shadow++; 566 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 567 vq->split.avail_idx_shadow); 568 vq->num_added++; 569 570 pr_debug("Added buffer head %i to %p\n", head, vq); 571 END_USE(vq); 572 573 /* This is very unlikely, but theoretically possible. Kick 574 * just in case. */ 575 if (unlikely(vq->num_added == (1 << 16) - 1)) 576 virtqueue_kick(_vq); 577 578 return 0; 579 580 unmap_release: 581 err_idx = i; 582 i = head; 583 584 for (n = 0; n < total_sg; n++) { 585 if (i == err_idx) 586 break; 587 vring_unmap_one_split(vq, &desc[i]); 588 i = virtio16_to_cpu(_vq->vdev, vq->split.vring.desc[i].next); 589 } 590 591 if (indirect) 592 kfree(desc); 593 594 END_USE(vq); 595 return -EIO; 596 } 597 598 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 599 { 600 struct vring_virtqueue *vq = to_vvq(_vq); 601 u16 new, old; 602 bool needs_kick; 603 604 START_USE(vq); 605 /* We need to expose available array entries before checking avail 606 * event. */ 607 virtio_mb(vq->weak_barriers); 608 609 old = vq->split.avail_idx_shadow - vq->num_added; 610 new = vq->split.avail_idx_shadow; 611 vq->num_added = 0; 612 613 LAST_ADD_TIME_CHECK(vq); 614 LAST_ADD_TIME_INVALID(vq); 615 616 if (vq->event) { 617 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 618 vring_avail_event(&vq->split.vring)), 619 new, old); 620 } else { 621 needs_kick = !(vq->split.vring.used->flags & 622 cpu_to_virtio16(_vq->vdev, 623 VRING_USED_F_NO_NOTIFY)); 624 } 625 END_USE(vq); 626 return needs_kick; 627 } 628 629 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 630 void **ctx) 631 { 632 unsigned int i, j; 633 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 634 635 /* Clear data ptr. */ 636 vq->split.desc_state[head].data = NULL; 637 638 /* Put back on free list: unmap first-level descriptors and find end */ 639 i = head; 640 641 while (vq->split.vring.desc[i].flags & nextflag) { 642 vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 643 i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next); 644 vq->vq.num_free++; 645 } 646 647 vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 648 vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, 649 vq->free_head); 650 vq->free_head = head; 651 652 /* Plus final descriptor */ 653 vq->vq.num_free++; 654 655 if (vq->indirect) { 656 struct vring_desc *indir_desc = 657 vq->split.desc_state[head].indir_desc; 658 u32 len; 659 660 /* Free the indirect table, if any, now that it's unmapped. */ 661 if (!indir_desc) 662 return; 663 664 len = virtio32_to_cpu(vq->vq.vdev, 665 vq->split.vring.desc[head].len); 666 667 BUG_ON(!(vq->split.vring.desc[head].flags & 668 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); 669 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 670 671 for (j = 0; j < len / sizeof(struct vring_desc); j++) 672 vring_unmap_one_split(vq, &indir_desc[j]); 673 674 kfree(indir_desc); 675 vq->split.desc_state[head].indir_desc = NULL; 676 } else if (ctx) { 677 *ctx = vq->split.desc_state[head].indir_desc; 678 } 679 } 680 681 static inline bool more_used_split(const struct vring_virtqueue *vq) 682 { 683 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 684 vq->split.vring.used->idx); 685 } 686 687 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 688 unsigned int *len, 689 void **ctx) 690 { 691 struct vring_virtqueue *vq = to_vvq(_vq); 692 void *ret; 693 unsigned int i; 694 u16 last_used; 695 696 START_USE(vq); 697 698 if (unlikely(vq->broken)) { 699 END_USE(vq); 700 return NULL; 701 } 702 703 if (!more_used_split(vq)) { 704 pr_debug("No more buffers in queue\n"); 705 END_USE(vq); 706 return NULL; 707 } 708 709 /* Only get used array entries after they have been exposed by host. */ 710 virtio_rmb(vq->weak_barriers); 711 712 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 713 i = virtio32_to_cpu(_vq->vdev, 714 vq->split.vring.used->ring[last_used].id); 715 *len = virtio32_to_cpu(_vq->vdev, 716 vq->split.vring.used->ring[last_used].len); 717 718 if (unlikely(i >= vq->split.vring.num)) { 719 BAD_RING(vq, "id %u out of range\n", i); 720 return NULL; 721 } 722 if (unlikely(!vq->split.desc_state[i].data)) { 723 BAD_RING(vq, "id %u is not a head!\n", i); 724 return NULL; 725 } 726 727 /* detach_buf_split clears data, so grab it now. */ 728 ret = vq->split.desc_state[i].data; 729 detach_buf_split(vq, i, ctx); 730 vq->last_used_idx++; 731 /* If we expect an interrupt for the next entry, tell host 732 * by writing event index and flush out the write before 733 * the read in the next get_buf call. */ 734 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 735 virtio_store_mb(vq->weak_barriers, 736 &vring_used_event(&vq->split.vring), 737 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 738 739 LAST_ADD_TIME_INVALID(vq); 740 741 END_USE(vq); 742 return ret; 743 } 744 745 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 746 { 747 struct vring_virtqueue *vq = to_vvq(_vq); 748 749 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 750 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 751 if (!vq->event) 752 vq->split.vring.avail->flags = 753 cpu_to_virtio16(_vq->vdev, 754 vq->split.avail_flags_shadow); 755 } 756 } 757 758 static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 759 { 760 struct vring_virtqueue *vq = to_vvq(_vq); 761 u16 last_used_idx; 762 763 START_USE(vq); 764 765 /* We optimistically turn back on interrupts, then check if there was 766 * more to do. */ 767 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 768 * either clear the flags bit or point the event index at the next 769 * entry. Always do both to keep code simple. */ 770 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 771 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 772 if (!vq->event) 773 vq->split.vring.avail->flags = 774 cpu_to_virtio16(_vq->vdev, 775 vq->split.avail_flags_shadow); 776 } 777 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 778 last_used_idx = vq->last_used_idx); 779 END_USE(vq); 780 return last_used_idx; 781 } 782 783 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 784 { 785 struct vring_virtqueue *vq = to_vvq(_vq); 786 787 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 788 vq->split.vring.used->idx); 789 } 790 791 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 792 { 793 struct vring_virtqueue *vq = to_vvq(_vq); 794 u16 bufs; 795 796 START_USE(vq); 797 798 /* We optimistically turn back on interrupts, then check if there was 799 * more to do. */ 800 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 801 * either clear the flags bit or point the event index at the next 802 * entry. Always update the event index to keep code simple. */ 803 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 804 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 805 if (!vq->event) 806 vq->split.vring.avail->flags = 807 cpu_to_virtio16(_vq->vdev, 808 vq->split.avail_flags_shadow); 809 } 810 /* TODO: tune this threshold */ 811 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 812 813 virtio_store_mb(vq->weak_barriers, 814 &vring_used_event(&vq->split.vring), 815 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 816 817 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 818 - vq->last_used_idx) > bufs)) { 819 END_USE(vq); 820 return false; 821 } 822 823 END_USE(vq); 824 return true; 825 } 826 827 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 828 { 829 struct vring_virtqueue *vq = to_vvq(_vq); 830 unsigned int i; 831 void *buf; 832 833 START_USE(vq); 834 835 for (i = 0; i < vq->split.vring.num; i++) { 836 if (!vq->split.desc_state[i].data) 837 continue; 838 /* detach_buf_split clears data, so grab it now. */ 839 buf = vq->split.desc_state[i].data; 840 detach_buf_split(vq, i, NULL); 841 vq->split.avail_idx_shadow--; 842 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 843 vq->split.avail_idx_shadow); 844 END_USE(vq); 845 return buf; 846 } 847 /* That should have freed everything. */ 848 BUG_ON(vq->vq.num_free != vq->split.vring.num); 849 850 END_USE(vq); 851 return NULL; 852 } 853 854 static struct virtqueue *vring_create_virtqueue_split( 855 unsigned int index, 856 unsigned int num, 857 unsigned int vring_align, 858 struct virtio_device *vdev, 859 bool weak_barriers, 860 bool may_reduce_num, 861 bool context, 862 bool (*notify)(struct virtqueue *), 863 void (*callback)(struct virtqueue *), 864 const char *name) 865 { 866 struct virtqueue *vq; 867 void *queue = NULL; 868 dma_addr_t dma_addr; 869 size_t queue_size_in_bytes; 870 struct vring vring; 871 872 /* We assume num is a power of 2. */ 873 if (num & (num - 1)) { 874 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 875 return NULL; 876 } 877 878 /* TODO: allocate each queue chunk individually */ 879 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 880 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 881 &dma_addr, 882 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 883 if (queue) 884 break; 885 } 886 887 if (!num) 888 return NULL; 889 890 if (!queue) { 891 /* Try to get a single page. You are my only hope! */ 892 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 893 &dma_addr, GFP_KERNEL|__GFP_ZERO); 894 } 895 if (!queue) 896 return NULL; 897 898 queue_size_in_bytes = vring_size(num, vring_align); 899 vring_init(&vring, num, queue, vring_align); 900 901 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 902 notify, callback, name); 903 if (!vq) { 904 vring_free_queue(vdev, queue_size_in_bytes, queue, 905 dma_addr); 906 return NULL; 907 } 908 909 to_vvq(vq)->split.queue_dma_addr = dma_addr; 910 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 911 to_vvq(vq)->we_own_ring = true; 912 913 return vq; 914 } 915 916 917 /* 918 * Packed ring specific functions - *_packed(). 919 */ 920 921 static void vring_unmap_state_packed(const struct vring_virtqueue *vq, 922 struct vring_desc_extra_packed *state) 923 { 924 u16 flags; 925 926 if (!vq->use_dma_api) 927 return; 928 929 flags = state->flags; 930 931 if (flags & VRING_DESC_F_INDIRECT) { 932 dma_unmap_single(vring_dma_dev(vq), 933 state->addr, state->len, 934 (flags & VRING_DESC_F_WRITE) ? 935 DMA_FROM_DEVICE : DMA_TO_DEVICE); 936 } else { 937 dma_unmap_page(vring_dma_dev(vq), 938 state->addr, state->len, 939 (flags & VRING_DESC_F_WRITE) ? 940 DMA_FROM_DEVICE : DMA_TO_DEVICE); 941 } 942 } 943 944 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 945 struct vring_packed_desc *desc) 946 { 947 u16 flags; 948 949 if (!vq->use_dma_api) 950 return; 951 952 flags = le16_to_cpu(desc->flags); 953 954 if (flags & VRING_DESC_F_INDIRECT) { 955 dma_unmap_single(vring_dma_dev(vq), 956 le64_to_cpu(desc->addr), 957 le32_to_cpu(desc->len), 958 (flags & VRING_DESC_F_WRITE) ? 959 DMA_FROM_DEVICE : DMA_TO_DEVICE); 960 } else { 961 dma_unmap_page(vring_dma_dev(vq), 962 le64_to_cpu(desc->addr), 963 le32_to_cpu(desc->len), 964 (flags & VRING_DESC_F_WRITE) ? 965 DMA_FROM_DEVICE : DMA_TO_DEVICE); 966 } 967 } 968 969 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 970 gfp_t gfp) 971 { 972 struct vring_packed_desc *desc; 973 974 /* 975 * We require lowmem mappings for the descriptors because 976 * otherwise virt_to_phys will give us bogus addresses in the 977 * virtqueue. 978 */ 979 gfp &= ~__GFP_HIGHMEM; 980 981 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 982 983 return desc; 984 } 985 986 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 987 struct scatterlist *sgs[], 988 unsigned int total_sg, 989 unsigned int out_sgs, 990 unsigned int in_sgs, 991 void *data, 992 gfp_t gfp) 993 { 994 struct vring_packed_desc *desc; 995 struct scatterlist *sg; 996 unsigned int i, n, err_idx; 997 u16 head, id; 998 dma_addr_t addr; 999 1000 head = vq->packed.next_avail_idx; 1001 desc = alloc_indirect_packed(total_sg, gfp); 1002 1003 if (unlikely(vq->vq.num_free < 1)) { 1004 pr_debug("Can't add buf len 1 - avail = 0\n"); 1005 END_USE(vq); 1006 return -ENOSPC; 1007 } 1008 1009 i = 0; 1010 id = vq->free_head; 1011 BUG_ON(id == vq->packed.vring.num); 1012 1013 for (n = 0; n < out_sgs + in_sgs; n++) { 1014 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1015 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1016 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1017 if (vring_mapping_error(vq, addr)) 1018 goto unmap_release; 1019 1020 desc[i].flags = cpu_to_le16(n < out_sgs ? 1021 0 : VRING_DESC_F_WRITE); 1022 desc[i].addr = cpu_to_le64(addr); 1023 desc[i].len = cpu_to_le32(sg->length); 1024 i++; 1025 } 1026 } 1027 1028 /* Now that the indirect table is filled in, map it. */ 1029 addr = vring_map_single(vq, desc, 1030 total_sg * sizeof(struct vring_packed_desc), 1031 DMA_TO_DEVICE); 1032 if (vring_mapping_error(vq, addr)) 1033 goto unmap_release; 1034 1035 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1036 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1037 sizeof(struct vring_packed_desc)); 1038 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1039 1040 if (vq->use_dma_api) { 1041 vq->packed.desc_extra[id].addr = addr; 1042 vq->packed.desc_extra[id].len = total_sg * 1043 sizeof(struct vring_packed_desc); 1044 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1045 vq->packed.avail_used_flags; 1046 } 1047 1048 /* 1049 * A driver MUST NOT make the first descriptor in the list 1050 * available before all subsequent descriptors comprising 1051 * the list are made available. 1052 */ 1053 virtio_wmb(vq->weak_barriers); 1054 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1055 vq->packed.avail_used_flags); 1056 1057 /* We're using some buffers from the free list. */ 1058 vq->vq.num_free -= 1; 1059 1060 /* Update free pointer */ 1061 n = head + 1; 1062 if (n >= vq->packed.vring.num) { 1063 n = 0; 1064 vq->packed.avail_wrap_counter ^= 1; 1065 vq->packed.avail_used_flags ^= 1066 1 << VRING_PACKED_DESC_F_AVAIL | 1067 1 << VRING_PACKED_DESC_F_USED; 1068 } 1069 vq->packed.next_avail_idx = n; 1070 vq->free_head = vq->packed.desc_state[id].next; 1071 1072 /* Store token and indirect buffer state. */ 1073 vq->packed.desc_state[id].num = 1; 1074 vq->packed.desc_state[id].data = data; 1075 vq->packed.desc_state[id].indir_desc = desc; 1076 vq->packed.desc_state[id].last = id; 1077 1078 vq->num_added += 1; 1079 1080 pr_debug("Added buffer head %i to %p\n", head, vq); 1081 END_USE(vq); 1082 1083 return 0; 1084 1085 unmap_release: 1086 err_idx = i; 1087 1088 for (i = 0; i < err_idx; i++) 1089 vring_unmap_desc_packed(vq, &desc[i]); 1090 1091 kfree(desc); 1092 1093 END_USE(vq); 1094 return -EIO; 1095 } 1096 1097 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1098 struct scatterlist *sgs[], 1099 unsigned int total_sg, 1100 unsigned int out_sgs, 1101 unsigned int in_sgs, 1102 void *data, 1103 void *ctx, 1104 gfp_t gfp) 1105 { 1106 struct vring_virtqueue *vq = to_vvq(_vq); 1107 struct vring_packed_desc *desc; 1108 struct scatterlist *sg; 1109 unsigned int i, n, c, descs_used, err_idx; 1110 __le16 uninitialized_var(head_flags), flags; 1111 u16 head, id, uninitialized_var(prev), curr, avail_used_flags; 1112 1113 START_USE(vq); 1114 1115 BUG_ON(data == NULL); 1116 BUG_ON(ctx && vq->indirect); 1117 1118 if (unlikely(vq->broken)) { 1119 END_USE(vq); 1120 return -EIO; 1121 } 1122 1123 LAST_ADD_TIME_UPDATE(vq); 1124 1125 BUG_ON(total_sg == 0); 1126 1127 if (virtqueue_use_indirect(_vq, total_sg)) 1128 return virtqueue_add_indirect_packed(vq, sgs, total_sg, 1129 out_sgs, in_sgs, data, gfp); 1130 1131 head = vq->packed.next_avail_idx; 1132 avail_used_flags = vq->packed.avail_used_flags; 1133 1134 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1135 1136 desc = vq->packed.vring.desc; 1137 i = head; 1138 descs_used = total_sg; 1139 1140 if (unlikely(vq->vq.num_free < descs_used)) { 1141 pr_debug("Can't add buf len %i - avail = %i\n", 1142 descs_used, vq->vq.num_free); 1143 END_USE(vq); 1144 return -ENOSPC; 1145 } 1146 1147 id = vq->free_head; 1148 BUG_ON(id == vq->packed.vring.num); 1149 1150 curr = id; 1151 c = 0; 1152 for (n = 0; n < out_sgs + in_sgs; n++) { 1153 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1154 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1155 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1156 if (vring_mapping_error(vq, addr)) 1157 goto unmap_release; 1158 1159 flags = cpu_to_le16(vq->packed.avail_used_flags | 1160 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1161 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1162 if (i == head) 1163 head_flags = flags; 1164 else 1165 desc[i].flags = flags; 1166 1167 desc[i].addr = cpu_to_le64(addr); 1168 desc[i].len = cpu_to_le32(sg->length); 1169 desc[i].id = cpu_to_le16(id); 1170 1171 if (unlikely(vq->use_dma_api)) { 1172 vq->packed.desc_extra[curr].addr = addr; 1173 vq->packed.desc_extra[curr].len = sg->length; 1174 vq->packed.desc_extra[curr].flags = 1175 le16_to_cpu(flags); 1176 } 1177 prev = curr; 1178 curr = vq->packed.desc_state[curr].next; 1179 1180 if ((unlikely(++i >= vq->packed.vring.num))) { 1181 i = 0; 1182 vq->packed.avail_used_flags ^= 1183 1 << VRING_PACKED_DESC_F_AVAIL | 1184 1 << VRING_PACKED_DESC_F_USED; 1185 } 1186 } 1187 } 1188 1189 if (i < head) 1190 vq->packed.avail_wrap_counter ^= 1; 1191 1192 /* We're using some buffers from the free list. */ 1193 vq->vq.num_free -= descs_used; 1194 1195 /* Update free pointer */ 1196 vq->packed.next_avail_idx = i; 1197 vq->free_head = curr; 1198 1199 /* Store token. */ 1200 vq->packed.desc_state[id].num = descs_used; 1201 vq->packed.desc_state[id].data = data; 1202 vq->packed.desc_state[id].indir_desc = ctx; 1203 vq->packed.desc_state[id].last = prev; 1204 1205 /* 1206 * A driver MUST NOT make the first descriptor in the list 1207 * available before all subsequent descriptors comprising 1208 * the list are made available. 1209 */ 1210 virtio_wmb(vq->weak_barriers); 1211 vq->packed.vring.desc[head].flags = head_flags; 1212 vq->num_added += descs_used; 1213 1214 pr_debug("Added buffer head %i to %p\n", head, vq); 1215 END_USE(vq); 1216 1217 return 0; 1218 1219 unmap_release: 1220 err_idx = i; 1221 i = head; 1222 1223 vq->packed.avail_used_flags = avail_used_flags; 1224 1225 for (n = 0; n < total_sg; n++) { 1226 if (i == err_idx) 1227 break; 1228 vring_unmap_desc_packed(vq, &desc[i]); 1229 i++; 1230 if (i >= vq->packed.vring.num) 1231 i = 0; 1232 } 1233 1234 END_USE(vq); 1235 return -EIO; 1236 } 1237 1238 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1239 { 1240 struct vring_virtqueue *vq = to_vvq(_vq); 1241 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1242 bool needs_kick; 1243 union { 1244 struct { 1245 __le16 off_wrap; 1246 __le16 flags; 1247 }; 1248 u32 u32; 1249 } snapshot; 1250 1251 START_USE(vq); 1252 1253 /* 1254 * We need to expose the new flags value before checking notification 1255 * suppressions. 1256 */ 1257 virtio_mb(vq->weak_barriers); 1258 1259 old = vq->packed.next_avail_idx - vq->num_added; 1260 new = vq->packed.next_avail_idx; 1261 vq->num_added = 0; 1262 1263 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1264 flags = le16_to_cpu(snapshot.flags); 1265 1266 LAST_ADD_TIME_CHECK(vq); 1267 LAST_ADD_TIME_INVALID(vq); 1268 1269 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1270 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1271 goto out; 1272 } 1273 1274 off_wrap = le16_to_cpu(snapshot.off_wrap); 1275 1276 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1277 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1278 if (wrap_counter != vq->packed.avail_wrap_counter) 1279 event_idx -= vq->packed.vring.num; 1280 1281 needs_kick = vring_need_event(event_idx, new, old); 1282 out: 1283 END_USE(vq); 1284 return needs_kick; 1285 } 1286 1287 static void detach_buf_packed(struct vring_virtqueue *vq, 1288 unsigned int id, void **ctx) 1289 { 1290 struct vring_desc_state_packed *state = NULL; 1291 struct vring_packed_desc *desc; 1292 unsigned int i, curr; 1293 1294 state = &vq->packed.desc_state[id]; 1295 1296 /* Clear data ptr. */ 1297 state->data = NULL; 1298 1299 vq->packed.desc_state[state->last].next = vq->free_head; 1300 vq->free_head = id; 1301 vq->vq.num_free += state->num; 1302 1303 if (unlikely(vq->use_dma_api)) { 1304 curr = id; 1305 for (i = 0; i < state->num; i++) { 1306 vring_unmap_state_packed(vq, 1307 &vq->packed.desc_extra[curr]); 1308 curr = vq->packed.desc_state[curr].next; 1309 } 1310 } 1311 1312 if (vq->indirect) { 1313 u32 len; 1314 1315 /* Free the indirect table, if any, now that it's unmapped. */ 1316 desc = state->indir_desc; 1317 if (!desc) 1318 return; 1319 1320 if (vq->use_dma_api) { 1321 len = vq->packed.desc_extra[id].len; 1322 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1323 i++) 1324 vring_unmap_desc_packed(vq, &desc[i]); 1325 } 1326 kfree(desc); 1327 state->indir_desc = NULL; 1328 } else if (ctx) { 1329 *ctx = state->indir_desc; 1330 } 1331 } 1332 1333 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1334 u16 idx, bool used_wrap_counter) 1335 { 1336 bool avail, used; 1337 u16 flags; 1338 1339 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1340 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1341 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1342 1343 return avail == used && used == used_wrap_counter; 1344 } 1345 1346 static inline bool more_used_packed(const struct vring_virtqueue *vq) 1347 { 1348 return is_used_desc_packed(vq, vq->last_used_idx, 1349 vq->packed.used_wrap_counter); 1350 } 1351 1352 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1353 unsigned int *len, 1354 void **ctx) 1355 { 1356 struct vring_virtqueue *vq = to_vvq(_vq); 1357 u16 last_used, id; 1358 void *ret; 1359 1360 START_USE(vq); 1361 1362 if (unlikely(vq->broken)) { 1363 END_USE(vq); 1364 return NULL; 1365 } 1366 1367 if (!more_used_packed(vq)) { 1368 pr_debug("No more buffers in queue\n"); 1369 END_USE(vq); 1370 return NULL; 1371 } 1372 1373 /* Only get used elements after they have been exposed by host. */ 1374 virtio_rmb(vq->weak_barriers); 1375 1376 last_used = vq->last_used_idx; 1377 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1378 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1379 1380 if (unlikely(id >= vq->packed.vring.num)) { 1381 BAD_RING(vq, "id %u out of range\n", id); 1382 return NULL; 1383 } 1384 if (unlikely(!vq->packed.desc_state[id].data)) { 1385 BAD_RING(vq, "id %u is not a head!\n", id); 1386 return NULL; 1387 } 1388 1389 /* detach_buf_packed clears data, so grab it now. */ 1390 ret = vq->packed.desc_state[id].data; 1391 detach_buf_packed(vq, id, ctx); 1392 1393 vq->last_used_idx += vq->packed.desc_state[id].num; 1394 if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { 1395 vq->last_used_idx -= vq->packed.vring.num; 1396 vq->packed.used_wrap_counter ^= 1; 1397 } 1398 1399 /* 1400 * If we expect an interrupt for the next entry, tell host 1401 * by writing event index and flush out the write before 1402 * the read in the next get_buf call. 1403 */ 1404 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1405 virtio_store_mb(vq->weak_barriers, 1406 &vq->packed.vring.driver->off_wrap, 1407 cpu_to_le16(vq->last_used_idx | 1408 (vq->packed.used_wrap_counter << 1409 VRING_PACKED_EVENT_F_WRAP_CTR))); 1410 1411 LAST_ADD_TIME_INVALID(vq); 1412 1413 END_USE(vq); 1414 return ret; 1415 } 1416 1417 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1418 { 1419 struct vring_virtqueue *vq = to_vvq(_vq); 1420 1421 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1422 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1423 vq->packed.vring.driver->flags = 1424 cpu_to_le16(vq->packed.event_flags_shadow); 1425 } 1426 } 1427 1428 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1429 { 1430 struct vring_virtqueue *vq = to_vvq(_vq); 1431 1432 START_USE(vq); 1433 1434 /* 1435 * We optimistically turn back on interrupts, then check if there was 1436 * more to do. 1437 */ 1438 1439 if (vq->event) { 1440 vq->packed.vring.driver->off_wrap = 1441 cpu_to_le16(vq->last_used_idx | 1442 (vq->packed.used_wrap_counter << 1443 VRING_PACKED_EVENT_F_WRAP_CTR)); 1444 /* 1445 * We need to update event offset and event wrap 1446 * counter first before updating event flags. 1447 */ 1448 virtio_wmb(vq->weak_barriers); 1449 } 1450 1451 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1452 vq->packed.event_flags_shadow = vq->event ? 1453 VRING_PACKED_EVENT_FLAG_DESC : 1454 VRING_PACKED_EVENT_FLAG_ENABLE; 1455 vq->packed.vring.driver->flags = 1456 cpu_to_le16(vq->packed.event_flags_shadow); 1457 } 1458 1459 END_USE(vq); 1460 return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << 1461 VRING_PACKED_EVENT_F_WRAP_CTR); 1462 } 1463 1464 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1465 { 1466 struct vring_virtqueue *vq = to_vvq(_vq); 1467 bool wrap_counter; 1468 u16 used_idx; 1469 1470 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1471 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1472 1473 return is_used_desc_packed(vq, used_idx, wrap_counter); 1474 } 1475 1476 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1477 { 1478 struct vring_virtqueue *vq = to_vvq(_vq); 1479 u16 used_idx, wrap_counter; 1480 u16 bufs; 1481 1482 START_USE(vq); 1483 1484 /* 1485 * We optimistically turn back on interrupts, then check if there was 1486 * more to do. 1487 */ 1488 1489 if (vq->event) { 1490 /* TODO: tune this threshold */ 1491 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1492 wrap_counter = vq->packed.used_wrap_counter; 1493 1494 used_idx = vq->last_used_idx + bufs; 1495 if (used_idx >= vq->packed.vring.num) { 1496 used_idx -= vq->packed.vring.num; 1497 wrap_counter ^= 1; 1498 } 1499 1500 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1501 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1502 1503 /* 1504 * We need to update event offset and event wrap 1505 * counter first before updating event flags. 1506 */ 1507 virtio_wmb(vq->weak_barriers); 1508 } else { 1509 used_idx = vq->last_used_idx; 1510 wrap_counter = vq->packed.used_wrap_counter; 1511 } 1512 1513 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1514 vq->packed.event_flags_shadow = vq->event ? 1515 VRING_PACKED_EVENT_FLAG_DESC : 1516 VRING_PACKED_EVENT_FLAG_ENABLE; 1517 vq->packed.vring.driver->flags = 1518 cpu_to_le16(vq->packed.event_flags_shadow); 1519 } 1520 1521 /* 1522 * We need to update event suppression structure first 1523 * before re-checking for more used buffers. 1524 */ 1525 virtio_mb(vq->weak_barriers); 1526 1527 if (is_used_desc_packed(vq, used_idx, wrap_counter)) { 1528 END_USE(vq); 1529 return false; 1530 } 1531 1532 END_USE(vq); 1533 return true; 1534 } 1535 1536 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1537 { 1538 struct vring_virtqueue *vq = to_vvq(_vq); 1539 unsigned int i; 1540 void *buf; 1541 1542 START_USE(vq); 1543 1544 for (i = 0; i < vq->packed.vring.num; i++) { 1545 if (!vq->packed.desc_state[i].data) 1546 continue; 1547 /* detach_buf clears data, so grab it now. */ 1548 buf = vq->packed.desc_state[i].data; 1549 detach_buf_packed(vq, i, NULL); 1550 END_USE(vq); 1551 return buf; 1552 } 1553 /* That should have freed everything. */ 1554 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1555 1556 END_USE(vq); 1557 return NULL; 1558 } 1559 1560 static struct virtqueue *vring_create_virtqueue_packed( 1561 unsigned int index, 1562 unsigned int num, 1563 unsigned int vring_align, 1564 struct virtio_device *vdev, 1565 bool weak_barriers, 1566 bool may_reduce_num, 1567 bool context, 1568 bool (*notify)(struct virtqueue *), 1569 void (*callback)(struct virtqueue *), 1570 const char *name) 1571 { 1572 struct vring_virtqueue *vq; 1573 struct vring_packed_desc *ring; 1574 struct vring_packed_desc_event *driver, *device; 1575 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1576 size_t ring_size_in_bytes, event_size_in_bytes; 1577 unsigned int i; 1578 1579 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1580 1581 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1582 &ring_dma_addr, 1583 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1584 if (!ring) 1585 goto err_ring; 1586 1587 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1588 1589 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1590 &driver_event_dma_addr, 1591 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1592 if (!driver) 1593 goto err_driver; 1594 1595 device = vring_alloc_queue(vdev, event_size_in_bytes, 1596 &device_event_dma_addr, 1597 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1598 if (!device) 1599 goto err_device; 1600 1601 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 1602 if (!vq) 1603 goto err_vq; 1604 1605 vq->vq.callback = callback; 1606 vq->vq.vdev = vdev; 1607 vq->vq.name = name; 1608 vq->vq.num_free = num; 1609 vq->vq.index = index; 1610 vq->we_own_ring = true; 1611 vq->notify = notify; 1612 vq->weak_barriers = weak_barriers; 1613 vq->broken = false; 1614 vq->last_used_idx = 0; 1615 vq->num_added = 0; 1616 vq->packed_ring = true; 1617 vq->use_dma_api = vring_use_dma_api(vdev); 1618 list_add_tail(&vq->vq.list, &vdev->vqs); 1619 #ifdef DEBUG 1620 vq->in_use = false; 1621 vq->last_add_time_valid = false; 1622 #endif 1623 1624 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1625 !context; 1626 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1627 1628 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 1629 vq->weak_barriers = false; 1630 1631 vq->packed.ring_dma_addr = ring_dma_addr; 1632 vq->packed.driver_event_dma_addr = driver_event_dma_addr; 1633 vq->packed.device_event_dma_addr = device_event_dma_addr; 1634 1635 vq->packed.ring_size_in_bytes = ring_size_in_bytes; 1636 vq->packed.event_size_in_bytes = event_size_in_bytes; 1637 1638 vq->packed.vring.num = num; 1639 vq->packed.vring.desc = ring; 1640 vq->packed.vring.driver = driver; 1641 vq->packed.vring.device = device; 1642 1643 vq->packed.next_avail_idx = 0; 1644 vq->packed.avail_wrap_counter = 1; 1645 vq->packed.used_wrap_counter = 1; 1646 vq->packed.event_flags_shadow = 0; 1647 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1648 1649 vq->packed.desc_state = kmalloc_array(num, 1650 sizeof(struct vring_desc_state_packed), 1651 GFP_KERNEL); 1652 if (!vq->packed.desc_state) 1653 goto err_desc_state; 1654 1655 memset(vq->packed.desc_state, 0, 1656 num * sizeof(struct vring_desc_state_packed)); 1657 1658 /* Put everything in free lists. */ 1659 vq->free_head = 0; 1660 for (i = 0; i < num-1; i++) 1661 vq->packed.desc_state[i].next = i + 1; 1662 1663 vq->packed.desc_extra = kmalloc_array(num, 1664 sizeof(struct vring_desc_extra_packed), 1665 GFP_KERNEL); 1666 if (!vq->packed.desc_extra) 1667 goto err_desc_extra; 1668 1669 memset(vq->packed.desc_extra, 0, 1670 num * sizeof(struct vring_desc_extra_packed)); 1671 1672 /* No callback? Tell other side not to bother us. */ 1673 if (!callback) { 1674 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1675 vq->packed.vring.driver->flags = 1676 cpu_to_le16(vq->packed.event_flags_shadow); 1677 } 1678 1679 return &vq->vq; 1680 1681 err_desc_extra: 1682 kfree(vq->packed.desc_state); 1683 err_desc_state: 1684 kfree(vq); 1685 err_vq: 1686 vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr); 1687 err_device: 1688 vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr); 1689 err_driver: 1690 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 1691 err_ring: 1692 return NULL; 1693 } 1694 1695 1696 /* 1697 * Generic functions and exported symbols. 1698 */ 1699 1700 static inline int virtqueue_add(struct virtqueue *_vq, 1701 struct scatterlist *sgs[], 1702 unsigned int total_sg, 1703 unsigned int out_sgs, 1704 unsigned int in_sgs, 1705 void *data, 1706 void *ctx, 1707 gfp_t gfp) 1708 { 1709 struct vring_virtqueue *vq = to_vvq(_vq); 1710 1711 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 1712 out_sgs, in_sgs, data, ctx, gfp) : 1713 virtqueue_add_split(_vq, sgs, total_sg, 1714 out_sgs, in_sgs, data, ctx, gfp); 1715 } 1716 1717 /** 1718 * virtqueue_add_sgs - expose buffers to other end 1719 * @vq: the struct virtqueue we're talking about. 1720 * @sgs: array of terminated scatterlists. 1721 * @out_num: the number of scatterlists readable by other side 1722 * @in_num: the number of scatterlists which are writable (after readable ones) 1723 * @data: the token identifying the buffer. 1724 * @gfp: how to do memory allocations (if necessary). 1725 * 1726 * Caller must ensure we don't call this with other virtqueue operations 1727 * at the same time (except where noted). 1728 * 1729 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1730 */ 1731 int virtqueue_add_sgs(struct virtqueue *_vq, 1732 struct scatterlist *sgs[], 1733 unsigned int out_sgs, 1734 unsigned int in_sgs, 1735 void *data, 1736 gfp_t gfp) 1737 { 1738 unsigned int i, total_sg = 0; 1739 1740 /* Count them first. */ 1741 for (i = 0; i < out_sgs + in_sgs; i++) { 1742 struct scatterlist *sg; 1743 1744 for (sg = sgs[i]; sg; sg = sg_next(sg)) 1745 total_sg++; 1746 } 1747 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1748 data, NULL, gfp); 1749 } 1750 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1751 1752 /** 1753 * virtqueue_add_outbuf - expose output buffers to other end 1754 * @vq: the struct virtqueue we're talking about. 1755 * @sg: scatterlist (must be well-formed and terminated!) 1756 * @num: the number of entries in @sg readable by other side 1757 * @data: the token identifying the buffer. 1758 * @gfp: how to do memory allocations (if necessary). 1759 * 1760 * Caller must ensure we don't call this with other virtqueue operations 1761 * at the same time (except where noted). 1762 * 1763 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1764 */ 1765 int virtqueue_add_outbuf(struct virtqueue *vq, 1766 struct scatterlist *sg, unsigned int num, 1767 void *data, 1768 gfp_t gfp) 1769 { 1770 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1771 } 1772 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1773 1774 /** 1775 * virtqueue_add_inbuf - expose input buffers to other end 1776 * @vq: the struct virtqueue we're talking about. 1777 * @sg: scatterlist (must be well-formed and terminated!) 1778 * @num: the number of entries in @sg writable by other side 1779 * @data: the token identifying the buffer. 1780 * @gfp: how to do memory allocations (if necessary). 1781 * 1782 * Caller must ensure we don't call this with other virtqueue operations 1783 * at the same time (except where noted). 1784 * 1785 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1786 */ 1787 int virtqueue_add_inbuf(struct virtqueue *vq, 1788 struct scatterlist *sg, unsigned int num, 1789 void *data, 1790 gfp_t gfp) 1791 { 1792 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1793 } 1794 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1795 1796 /** 1797 * virtqueue_add_inbuf_ctx - expose input buffers to other end 1798 * @vq: the struct virtqueue we're talking about. 1799 * @sg: scatterlist (must be well-formed and terminated!) 1800 * @num: the number of entries in @sg writable by other side 1801 * @data: the token identifying the buffer. 1802 * @ctx: extra context for the token 1803 * @gfp: how to do memory allocations (if necessary). 1804 * 1805 * Caller must ensure we don't call this with other virtqueue operations 1806 * at the same time (except where noted). 1807 * 1808 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1809 */ 1810 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1811 struct scatterlist *sg, unsigned int num, 1812 void *data, 1813 void *ctx, 1814 gfp_t gfp) 1815 { 1816 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1817 } 1818 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1819 1820 /** 1821 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1822 * @vq: the struct virtqueue 1823 * 1824 * Instead of virtqueue_kick(), you can do: 1825 * if (virtqueue_kick_prepare(vq)) 1826 * virtqueue_notify(vq); 1827 * 1828 * This is sometimes useful because the virtqueue_kick_prepare() needs 1829 * to be serialized, but the actual virtqueue_notify() call does not. 1830 */ 1831 bool virtqueue_kick_prepare(struct virtqueue *_vq) 1832 { 1833 struct vring_virtqueue *vq = to_vvq(_vq); 1834 1835 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 1836 virtqueue_kick_prepare_split(_vq); 1837 } 1838 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 1839 1840 /** 1841 * virtqueue_notify - second half of split virtqueue_kick call. 1842 * @vq: the struct virtqueue 1843 * 1844 * This does not need to be serialized. 1845 * 1846 * Returns false if host notify failed or queue is broken, otherwise true. 1847 */ 1848 bool virtqueue_notify(struct virtqueue *_vq) 1849 { 1850 struct vring_virtqueue *vq = to_vvq(_vq); 1851 1852 if (unlikely(vq->broken)) 1853 return false; 1854 1855 /* Prod other side to tell it about changes. */ 1856 if (!vq->notify(_vq)) { 1857 vq->broken = true; 1858 return false; 1859 } 1860 return true; 1861 } 1862 EXPORT_SYMBOL_GPL(virtqueue_notify); 1863 1864 /** 1865 * virtqueue_kick - update after add_buf 1866 * @vq: the struct virtqueue 1867 * 1868 * After one or more virtqueue_add_* calls, invoke this to kick 1869 * the other side. 1870 * 1871 * Caller must ensure we don't call this with other virtqueue 1872 * operations at the same time (except where noted). 1873 * 1874 * Returns false if kick failed, otherwise true. 1875 */ 1876 bool virtqueue_kick(struct virtqueue *vq) 1877 { 1878 if (virtqueue_kick_prepare(vq)) 1879 return virtqueue_notify(vq); 1880 return true; 1881 } 1882 EXPORT_SYMBOL_GPL(virtqueue_kick); 1883 1884 /** 1885 * virtqueue_get_buf - get the next used buffer 1886 * @vq: the struct virtqueue we're talking about. 1887 * @len: the length written into the buffer 1888 * 1889 * If the device wrote data into the buffer, @len will be set to the 1890 * amount written. This means you don't need to clear the buffer 1891 * beforehand to ensure there's no data leakage in the case of short 1892 * writes. 1893 * 1894 * Caller must ensure we don't call this with other virtqueue 1895 * operations at the same time (except where noted). 1896 * 1897 * Returns NULL if there are no used buffers, or the "data" token 1898 * handed to virtqueue_add_*(). 1899 */ 1900 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1901 void **ctx) 1902 { 1903 struct vring_virtqueue *vq = to_vvq(_vq); 1904 1905 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 1906 virtqueue_get_buf_ctx_split(_vq, len, ctx); 1907 } 1908 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 1909 1910 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 1911 { 1912 return virtqueue_get_buf_ctx(_vq, len, NULL); 1913 } 1914 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 1915 /** 1916 * virtqueue_disable_cb - disable callbacks 1917 * @vq: the struct virtqueue we're talking about. 1918 * 1919 * Note that this is not necessarily synchronous, hence unreliable and only 1920 * useful as an optimization. 1921 * 1922 * Unlike other operations, this need not be serialized. 1923 */ 1924 void virtqueue_disable_cb(struct virtqueue *_vq) 1925 { 1926 struct vring_virtqueue *vq = to_vvq(_vq); 1927 1928 if (vq->packed_ring) 1929 virtqueue_disable_cb_packed(_vq); 1930 else 1931 virtqueue_disable_cb_split(_vq); 1932 } 1933 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 1934 1935 /** 1936 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 1937 * @vq: the struct virtqueue we're talking about. 1938 * 1939 * This re-enables callbacks; it returns current queue state 1940 * in an opaque unsigned value. This value should be later tested by 1941 * virtqueue_poll, to detect a possible race between the driver checking for 1942 * more work, and enabling callbacks. 1943 * 1944 * Caller must ensure we don't call this with other virtqueue 1945 * operations at the same time (except where noted). 1946 */ 1947 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 1948 { 1949 struct vring_virtqueue *vq = to_vvq(_vq); 1950 1951 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 1952 virtqueue_enable_cb_prepare_split(_vq); 1953 } 1954 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 1955 1956 /** 1957 * virtqueue_poll - query pending used buffers 1958 * @vq: the struct virtqueue we're talking about. 1959 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 1960 * 1961 * Returns "true" if there are pending used buffers in the queue. 1962 * 1963 * This does not need to be serialized. 1964 */ 1965 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 1966 { 1967 struct vring_virtqueue *vq = to_vvq(_vq); 1968 1969 virtio_mb(vq->weak_barriers); 1970 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 1971 virtqueue_poll_split(_vq, last_used_idx); 1972 } 1973 EXPORT_SYMBOL_GPL(virtqueue_poll); 1974 1975 /** 1976 * virtqueue_enable_cb - restart callbacks after disable_cb. 1977 * @vq: the struct virtqueue we're talking about. 1978 * 1979 * This re-enables callbacks; it returns "false" if there are pending 1980 * buffers in the queue, to detect a possible race between the driver 1981 * checking for more work, and enabling callbacks. 1982 * 1983 * Caller must ensure we don't call this with other virtqueue 1984 * operations at the same time (except where noted). 1985 */ 1986 bool virtqueue_enable_cb(struct virtqueue *_vq) 1987 { 1988 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 1989 1990 return !virtqueue_poll(_vq, last_used_idx); 1991 } 1992 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 1993 1994 /** 1995 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 1996 * @vq: the struct virtqueue we're talking about. 1997 * 1998 * This re-enables callbacks but hints to the other side to delay 1999 * interrupts until most of the available buffers have been processed; 2000 * it returns "false" if there are many pending buffers in the queue, 2001 * to detect a possible race between the driver checking for more work, 2002 * and enabling callbacks. 2003 * 2004 * Caller must ensure we don't call this with other virtqueue 2005 * operations at the same time (except where noted). 2006 */ 2007 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2008 { 2009 struct vring_virtqueue *vq = to_vvq(_vq); 2010 2011 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2012 virtqueue_enable_cb_delayed_split(_vq); 2013 } 2014 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2015 2016 /** 2017 * virtqueue_detach_unused_buf - detach first unused buffer 2018 * @vq: the struct virtqueue we're talking about. 2019 * 2020 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2021 * This is not valid on an active queue; it is useful only for device 2022 * shutdown. 2023 */ 2024 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2025 { 2026 struct vring_virtqueue *vq = to_vvq(_vq); 2027 2028 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2029 virtqueue_detach_unused_buf_split(_vq); 2030 } 2031 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2032 2033 static inline bool more_used(const struct vring_virtqueue *vq) 2034 { 2035 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2036 } 2037 2038 irqreturn_t vring_interrupt(int irq, void *_vq) 2039 { 2040 struct vring_virtqueue *vq = to_vvq(_vq); 2041 2042 if (!more_used(vq)) { 2043 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2044 return IRQ_NONE; 2045 } 2046 2047 if (unlikely(vq->broken)) 2048 return IRQ_HANDLED; 2049 2050 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2051 if (vq->vq.callback) 2052 vq->vq.callback(&vq->vq); 2053 2054 return IRQ_HANDLED; 2055 } 2056 EXPORT_SYMBOL_GPL(vring_interrupt); 2057 2058 /* Only available for split ring */ 2059 struct virtqueue *__vring_new_virtqueue(unsigned int index, 2060 struct vring vring, 2061 struct virtio_device *vdev, 2062 bool weak_barriers, 2063 bool context, 2064 bool (*notify)(struct virtqueue *), 2065 void (*callback)(struct virtqueue *), 2066 const char *name) 2067 { 2068 unsigned int i; 2069 struct vring_virtqueue *vq; 2070 2071 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2072 return NULL; 2073 2074 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2075 if (!vq) 2076 return NULL; 2077 2078 vq->packed_ring = false; 2079 vq->vq.callback = callback; 2080 vq->vq.vdev = vdev; 2081 vq->vq.name = name; 2082 vq->vq.num_free = vring.num; 2083 vq->vq.index = index; 2084 vq->we_own_ring = false; 2085 vq->notify = notify; 2086 vq->weak_barriers = weak_barriers; 2087 vq->broken = false; 2088 vq->last_used_idx = 0; 2089 vq->num_added = 0; 2090 vq->use_dma_api = vring_use_dma_api(vdev); 2091 list_add_tail(&vq->vq.list, &vdev->vqs); 2092 #ifdef DEBUG 2093 vq->in_use = false; 2094 vq->last_add_time_valid = false; 2095 #endif 2096 2097 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2098 !context; 2099 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2100 2101 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2102 vq->weak_barriers = false; 2103 2104 vq->split.queue_dma_addr = 0; 2105 vq->split.queue_size_in_bytes = 0; 2106 2107 vq->split.vring = vring; 2108 vq->split.avail_flags_shadow = 0; 2109 vq->split.avail_idx_shadow = 0; 2110 2111 /* No callback? Tell other side not to bother us. */ 2112 if (!callback) { 2113 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 2114 if (!vq->event) 2115 vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2116 vq->split.avail_flags_shadow); 2117 } 2118 2119 vq->split.desc_state = kmalloc_array(vring.num, 2120 sizeof(struct vring_desc_state_split), GFP_KERNEL); 2121 if (!vq->split.desc_state) { 2122 kfree(vq); 2123 return NULL; 2124 } 2125 2126 /* Put everything in free lists. */ 2127 vq->free_head = 0; 2128 for (i = 0; i < vring.num-1; i++) 2129 vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); 2130 memset(vq->split.desc_state, 0, vring.num * 2131 sizeof(struct vring_desc_state_split)); 2132 2133 return &vq->vq; 2134 } 2135 EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 2136 2137 struct virtqueue *vring_create_virtqueue( 2138 unsigned int index, 2139 unsigned int num, 2140 unsigned int vring_align, 2141 struct virtio_device *vdev, 2142 bool weak_barriers, 2143 bool may_reduce_num, 2144 bool context, 2145 bool (*notify)(struct virtqueue *), 2146 void (*callback)(struct virtqueue *), 2147 const char *name) 2148 { 2149 2150 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2151 return vring_create_virtqueue_packed(index, num, vring_align, 2152 vdev, weak_barriers, may_reduce_num, 2153 context, notify, callback, name); 2154 2155 return vring_create_virtqueue_split(index, num, vring_align, 2156 vdev, weak_barriers, may_reduce_num, 2157 context, notify, callback, name); 2158 } 2159 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2160 2161 /* Only available for split ring */ 2162 struct virtqueue *vring_new_virtqueue(unsigned int index, 2163 unsigned int num, 2164 unsigned int vring_align, 2165 struct virtio_device *vdev, 2166 bool weak_barriers, 2167 bool context, 2168 void *pages, 2169 bool (*notify)(struct virtqueue *vq), 2170 void (*callback)(struct virtqueue *vq), 2171 const char *name) 2172 { 2173 struct vring vring; 2174 2175 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2176 return NULL; 2177 2178 vring_init(&vring, num, pages, vring_align); 2179 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 2180 notify, callback, name); 2181 } 2182 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2183 2184 void vring_del_virtqueue(struct virtqueue *_vq) 2185 { 2186 struct vring_virtqueue *vq = to_vvq(_vq); 2187 2188 if (vq->we_own_ring) { 2189 if (vq->packed_ring) { 2190 vring_free_queue(vq->vq.vdev, 2191 vq->packed.ring_size_in_bytes, 2192 vq->packed.vring.desc, 2193 vq->packed.ring_dma_addr); 2194 2195 vring_free_queue(vq->vq.vdev, 2196 vq->packed.event_size_in_bytes, 2197 vq->packed.vring.driver, 2198 vq->packed.driver_event_dma_addr); 2199 2200 vring_free_queue(vq->vq.vdev, 2201 vq->packed.event_size_in_bytes, 2202 vq->packed.vring.device, 2203 vq->packed.device_event_dma_addr); 2204 2205 kfree(vq->packed.desc_state); 2206 kfree(vq->packed.desc_extra); 2207 } else { 2208 vring_free_queue(vq->vq.vdev, 2209 vq->split.queue_size_in_bytes, 2210 vq->split.vring.desc, 2211 vq->split.queue_dma_addr); 2212 2213 kfree(vq->split.desc_state); 2214 } 2215 } 2216 list_del(&_vq->list); 2217 kfree(vq); 2218 } 2219 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2220 2221 /* Manipulates transport-specific feature bits. */ 2222 void vring_transport_features(struct virtio_device *vdev) 2223 { 2224 unsigned int i; 2225 2226 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2227 switch (i) { 2228 case VIRTIO_RING_F_INDIRECT_DESC: 2229 break; 2230 case VIRTIO_RING_F_EVENT_IDX: 2231 break; 2232 case VIRTIO_F_VERSION_1: 2233 break; 2234 case VIRTIO_F_IOMMU_PLATFORM: 2235 break; 2236 case VIRTIO_F_RING_PACKED: 2237 break; 2238 case VIRTIO_F_ORDER_PLATFORM: 2239 break; 2240 default: 2241 /* We don't understand this bit. */ 2242 __virtio_clear_bit(vdev, i); 2243 } 2244 } 2245 } 2246 EXPORT_SYMBOL_GPL(vring_transport_features); 2247 2248 /** 2249 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2250 * @vq: the struct virtqueue containing the vring of interest. 2251 * 2252 * Returns the size of the vring. This is mainly used for boasting to 2253 * userspace. Unlike other operations, this need not be serialized. 2254 */ 2255 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2256 { 2257 2258 struct vring_virtqueue *vq = to_vvq(_vq); 2259 2260 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2261 } 2262 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2263 2264 bool virtqueue_is_broken(struct virtqueue *_vq) 2265 { 2266 struct vring_virtqueue *vq = to_vvq(_vq); 2267 2268 return vq->broken; 2269 } 2270 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2271 2272 /* 2273 * This should prevent the device from being used, allowing drivers to 2274 * recover. You may need to grab appropriate locks to flush. 2275 */ 2276 void virtio_break_device(struct virtio_device *dev) 2277 { 2278 struct virtqueue *_vq; 2279 2280 list_for_each_entry(_vq, &dev->vqs, list) { 2281 struct vring_virtqueue *vq = to_vvq(_vq); 2282 vq->broken = true; 2283 } 2284 } 2285 EXPORT_SYMBOL_GPL(virtio_break_device); 2286 2287 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2288 { 2289 struct vring_virtqueue *vq = to_vvq(_vq); 2290 2291 BUG_ON(!vq->we_own_ring); 2292 2293 if (vq->packed_ring) 2294 return vq->packed.ring_dma_addr; 2295 2296 return vq->split.queue_dma_addr; 2297 } 2298 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2299 2300 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2301 { 2302 struct vring_virtqueue *vq = to_vvq(_vq); 2303 2304 BUG_ON(!vq->we_own_ring); 2305 2306 if (vq->packed_ring) 2307 return vq->packed.driver_event_dma_addr; 2308 2309 return vq->split.queue_dma_addr + 2310 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2311 } 2312 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2313 2314 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2315 { 2316 struct vring_virtqueue *vq = to_vvq(_vq); 2317 2318 BUG_ON(!vq->we_own_ring); 2319 2320 if (vq->packed_ring) 2321 return vq->packed.device_event_dma_addr; 2322 2323 return vq->split.queue_dma_addr + 2324 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2325 } 2326 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2327 2328 /* Only available for split ring */ 2329 const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2330 { 2331 return &to_vvq(vq)->split.vring; 2332 } 2333 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2334 2335 MODULE_LICENSE("GPL"); 2336