1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Implements the virtqueue interface as basically described 31 * in the original VirtIO paper. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/malloc.h> 38 #include <sys/mutex.h> 39 #include <sys/sdt.h> 40 #include <sys/sglist.h> 41 #include <vm/vm.h> 42 #include <vm/pmap.h> 43 44 #include <machine/cpu.h> 45 #include <machine/bus.h> 46 #include <machine/atomic.h> 47 #include <machine/resource.h> 48 #include <sys/bus.h> 49 #include <sys/rman.h> 50 51 #include <dev/virtio/virtio.h> 52 #include <dev/virtio/virtqueue.h> 53 #include <dev/virtio/virtio_ring.h> 54 55 #include "virtio_bus_if.h" 56 57 struct virtqueue { 58 device_t vq_dev; 59 struct mtx vq_ring_mtx; 60 struct mtx vq_indirect_mtx; 61 uint16_t vq_queue_index; 62 uint16_t vq_nentries; 63 uint32_t vq_flags; 64 #define VIRTQUEUE_FLAG_MODERN 0x0001 65 #define VIRTQUEUE_FLAG_INDIRECT 0x0002 66 #define VIRTQUEUE_FLAG_EVENT_IDX 0x0004 67 68 int vq_max_indirect_size; 69 bus_size_t vq_notify_offset; 70 virtqueue_intr_t *vq_intrhand; 71 void *vq_intrhand_arg; 72 73 struct vring vq_ring; 74 uint16_t vq_free_cnt; 75 uint16_t vq_queued_cnt; 76 /* 77 * Head of the free chain in the descriptor table. If 78 * there are no free descriptors, this will be set to 79 * VQ_RING_DESC_CHAIN_END. 80 */ 81 uint16_t vq_desc_head_idx; 82 /* 83 * Last consumed descriptor in the used table, 84 * trails vq_ring.used->idx. 85 */ 86 uint16_t vq_used_cons_idx; 87 88 void *vq_ring_mem; 89 bus_dmamap_t vq_ring_mapp; 90 vm_paddr_t vq_ring_paddr; 91 92 int vq_indirect_mem_size; 93 int vq_alignment; 94 int vq_ring_size; 95 char vq_name[VIRTQUEUE_MAX_NAME_SZ]; 96 97 bus_dma_tag_t vq_ring_dmat; 98 bus_dma_tag_t vq_indirect_dmat; 99 100 struct vq_desc_extra { 101 void *cookie; 102 struct vring_desc *indirect; 103 vm_paddr_t indirect_paddr; 104 bus_dmamap_t mapp; 105 uint16_t ndescs; 106 } vq_descx[0]; 107 }; 108 109 /* 110 * The maximum virtqueue size is 2^15. Use that value as the end of 111 * descriptor chain terminator since it will never be a valid index 112 * in the descriptor table. This is used to verify we are correctly 113 * handling vq_free_cnt. 114 */ 115 #define VQ_RING_DESC_CHAIN_END 32768 116 117 #define VQASSERT(_vq, _exp, _msg, ...) \ 118 KASSERT((_exp),("%s: %s - "_msg, __func__, (_vq)->vq_name, \ 119 ##__VA_ARGS__)) 120 121 #define VQ_RING_ASSERT_VALID_IDX(_vq, _idx) \ 122 VQASSERT((_vq), (_idx) < (_vq)->vq_nentries, \ 123 "invalid ring index: %d, max: %d", (_idx), \ 124 (_vq)->vq_nentries) 125 126 #define VQ_RING_ASSERT_CHAIN_TERM(_vq) \ 127 VQASSERT((_vq), (_vq)->vq_desc_head_idx == \ 128 VQ_RING_DESC_CHAIN_END, "full ring terminated " \ 129 "incorrectly: head idx: %d", (_vq)->vq_desc_head_idx) 130 131 static int virtqueue_init_indirect(struct virtqueue *vq, int); 132 static void virtqueue_free_indirect(struct virtqueue *vq); 133 static void virtqueue_init_indirect_list(struct virtqueue *, 134 struct vring_desc *); 135 136 static void vq_ring_init(struct virtqueue *); 137 static void vq_ring_update_avail(struct virtqueue *, uint16_t); 138 static uint16_t vq_ring_enqueue_segments(struct virtqueue *, 139 struct vring_desc *, uint16_t, struct sglist *, int, int); 140 static bool vq_ring_use_indirect(struct virtqueue *, int); 141 static void vq_ring_enqueue_indirect(struct virtqueue *, void *, 142 struct sglist *, int, int); 143 static int vq_ring_enable_interrupt(struct virtqueue *, uint16_t); 144 static int vq_ring_must_notify_host(struct virtqueue *); 145 static void vq_ring_notify_host(struct virtqueue *); 146 static void vq_ring_free_chain(struct virtqueue *, uint16_t); 147 148 SDT_PROVIDER_DEFINE(virtqueue); 149 SDT_PROBE_DEFINE6(virtqueue, , enqueue_segments, entry, "struct virtqueue *", 150 "struct vring_desc *", "uint16_t", "struct sglist *", "int", "int"); 151 SDT_PROBE_DEFINE1(virtqueue, , enqueue_segments, return, "uint16_t"); 152 153 #define vq_modern(_vq) (((_vq)->vq_flags & VIRTQUEUE_FLAG_MODERN) != 0) 154 #define vq_htog16(_vq, _val) virtio_htog16(vq_modern(_vq), _val) 155 #define vq_htog32(_vq, _val) virtio_htog32(vq_modern(_vq), _val) 156 #define vq_htog64(_vq, _val) virtio_htog64(vq_modern(_vq), _val) 157 #define vq_gtoh16(_vq, _val) virtio_gtoh16(vq_modern(_vq), _val) 158 #define vq_gtoh32(_vq, _val) virtio_gtoh32(vq_modern(_vq), _val) 159 #define vq_gtoh64(_vq, _val) virtio_gtoh64(vq_modern(_vq), _val) 160 161 static void 162 virtqueue_ring_load_callback(void *arg, bus_dma_segment_t *segs, 163 int nsegs, int error) 164 { 165 struct virtqueue *vq; 166 167 if (error != 0) 168 return; 169 170 KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); 171 172 vq = (struct virtqueue *)arg; 173 vq->vq_ring_paddr = segs[0].ds_addr; 174 } 175 176 int 177 virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size, 178 bus_size_t notify_offset, int align, vm_paddr_t highaddr, 179 struct vq_alloc_info *info, struct virtqueue **vqp) 180 { 181 struct virtqueue *vq; 182 int error; 183 184 *vqp = NULL; 185 error = 0; 186 187 if (size == 0) { 188 device_printf(dev, 189 "virtqueue %d (%s) does not exist (size is zero)\n", 190 queue, info->vqai_name); 191 return (ENODEV); 192 } else if (!powerof2(size)) { 193 device_printf(dev, 194 "virtqueue %d (%s) size is not a power of 2: %d\n", 195 queue, info->vqai_name, size); 196 return (ENXIO); 197 } else if (info->vqai_maxindirsz > VIRTIO_MAX_INDIRECT) { 198 device_printf(dev, "virtqueue %d (%s) requested too many " 199 "indirect descriptors: %d, max %d\n", 200 queue, info->vqai_name, info->vqai_maxindirsz, 201 VIRTIO_MAX_INDIRECT); 202 return (EINVAL); 203 } 204 205 vq = malloc(sizeof(struct virtqueue) + 206 size * sizeof(struct vq_desc_extra), M_DEVBUF, M_NOWAIT | M_ZERO); 207 if (vq == NULL) { 208 device_printf(dev, "cannot allocate virtqueue\n"); 209 return (ENOMEM); 210 } 211 212 vq->vq_dev = dev; 213 strlcpy(vq->vq_name, info->vqai_name, sizeof(vq->vq_name)); 214 vq->vq_queue_index = queue; 215 vq->vq_notify_offset = notify_offset; 216 vq->vq_alignment = align; 217 vq->vq_nentries = size; 218 vq->vq_free_cnt = size; 219 vq->vq_intrhand = info->vqai_intr; 220 vq->vq_intrhand_arg = info->vqai_intr_arg; 221 222 if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_F_VERSION_1) != 0) 223 vq->vq_flags |= VIRTQUEUE_FLAG_MODERN; 224 if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_EVENT_IDX) != 0) 225 vq->vq_flags |= VIRTQUEUE_FLAG_EVENT_IDX; 226 227 vq->vq_ring_size = round_page(vring_size(size, align)); 228 229 mtx_init(&vq->vq_ring_mtx, device_get_nameunit(dev), 230 "VirtIO Queue Lock", MTX_DEF); 231 232 error = bus_dma_tag_create( 233 bus_get_dma_tag(dev), /* parent */ 234 align, /* alignment */ 235 0, /* boundary */ 236 BUS_SPACE_MAXADDR, /* lowaddr */ 237 BUS_SPACE_MAXADDR, /* highaddr */ 238 NULL, NULL, /* filter, filterarg */ 239 vq->vq_ring_size, /* max request size */ 240 1, /* max # segments */ 241 vq->vq_ring_size, /* maxsegsize */ 242 BUS_DMA_COHERENT, /* flags */ 243 busdma_lock_mutex, /* lockfunc */ 244 &vq->vq_ring_mtx, /* lockarg */ 245 &vq->vq_ring_dmat); 246 if (error) { 247 device_printf(dev, "cannot create bus_dma_tag\n"); 248 goto fail; 249 } 250 251 #ifdef __powerpc__ 252 /* 253 * Virtio uses physical addresses rather than bus addresses, so we 254 * need to ask busdma to skip the iommu physical->bus mapping. At 255 * present, this is only a thing on the powerpc architectures. 256 */ 257 bus_dma_tag_set_iommu(vq->vq_ring_dmat, NULL, NULL); 258 #endif 259 260 if (info->vqai_maxindirsz > 1) { 261 error = virtqueue_init_indirect(vq, info->vqai_maxindirsz); 262 if (error) 263 goto fail; 264 } 265 266 error = bus_dmamem_alloc(vq->vq_ring_dmat, &vq->vq_ring_mem, 267 BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, 268 &vq->vq_ring_mapp); 269 if (error) { 270 device_printf(dev, "bus_dmamem_alloc failed\n"); 271 goto fail; 272 } 273 274 error = bus_dmamap_load(vq->vq_ring_dmat, vq->vq_ring_mapp, 275 vq->vq_ring_mem, vq->vq_ring_size, virtqueue_ring_load_callback, 276 vq, BUS_DMA_NOWAIT); 277 if (error) { 278 device_printf(dev, "vq->vq_ring_mapp load failed\n"); 279 goto fail; 280 } 281 282 vq_ring_init(vq); 283 virtqueue_disable_intr(vq); 284 285 *vqp = vq; 286 287 fail: 288 if (error) 289 virtqueue_free(vq); 290 291 return (error); 292 } 293 294 static void 295 virtqueue_indirect_load_callback(void *arg, bus_dma_segment_t *segs, 296 int nsegs, int error) 297 { 298 struct vq_desc_extra *dxp; 299 300 if (error != 0) 301 return; 302 303 KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); 304 305 dxp = (struct vq_desc_extra *)arg; 306 dxp->indirect_paddr = segs[0].ds_addr; 307 } 308 309 static int 310 virtqueue_init_indirect(struct virtqueue *vq, int indirect_size) 311 { 312 device_t dev; 313 struct vq_desc_extra *dxp; 314 int i, size; 315 int error; 316 int align; 317 318 dev = vq->vq_dev; 319 320 if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0) { 321 /* 322 * Indirect descriptors requested by the driver but not 323 * negotiated. Return zero to keep the initialization 324 * going: we'll run fine without. 325 */ 326 if (bootverbose) 327 device_printf(dev, "virtqueue %d (%s) requested " 328 "indirect descriptors but not negotiated\n", 329 vq->vq_queue_index, vq->vq_name); 330 return (0); 331 } 332 333 size = indirect_size * sizeof(struct vring_desc); 334 vq->vq_max_indirect_size = indirect_size; 335 vq->vq_indirect_mem_size = size; 336 vq->vq_flags |= VIRTQUEUE_FLAG_INDIRECT; 337 338 mtx_init(&vq->vq_indirect_mtx, device_get_nameunit(dev), 339 "VirtIO Indirect Queue Lock", MTX_DEF); 340 341 align = size; 342 error = bus_dma_tag_create( 343 bus_get_dma_tag(dev), /* parent */ 344 align, /* alignment */ 345 0, /* boundary */ 346 BUS_SPACE_MAXADDR, /* lowaddr */ 347 BUS_SPACE_MAXADDR, /* highaddr */ 348 NULL, NULL, /* filter, filterarg */ 349 size, /* max request size */ 350 1, /* max # segments */ 351 size, /* maxsegsize */ 352 BUS_DMA_COHERENT, /* flags */ 353 busdma_lock_mutex, /* lockfunc */ 354 &vq->vq_indirect_mtx, /* lockarg */ 355 &vq->vq_indirect_dmat); 356 if (error) { 357 device_printf(dev, "cannot create indirect bus_dma_tag\n"); 358 return (error); 359 } 360 361 #ifdef __powerpc__ 362 /* 363 * Virtio uses physical addresses rather than bus addresses, so we 364 * need to ask busdma to skip the iommu physical->bus mapping. At 365 * present, this is only a thing on the powerpc architectures. 366 */ 367 bus_dma_tag_set_iommu(vq->vq_indirect_dmat, NULL, NULL); 368 #endif 369 370 for (i = 0; i < vq->vq_nentries; i++) { 371 dxp = &vq->vq_descx[i]; 372 373 error = bus_dmamem_alloc(vq->vq_indirect_dmat, 374 (void **)&dxp->indirect, 375 BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, 376 &dxp->mapp); 377 if (error) { 378 panic("dxp->mapp alloc failed\n"); 379 return (error); 380 } 381 382 error = bus_dmamap_load(vq->vq_indirect_dmat, dxp->mapp, 383 dxp->indirect, size, virtqueue_indirect_load_callback, dxp, 384 BUS_DMA_NOWAIT); 385 if (error) { 386 panic("dxp->mapp load failed\n"); 387 bus_dmamem_free(vq->vq_indirect_dmat, dxp->indirect, 388 dxp->mapp); 389 dxp->indirect = NULL; 390 return (error); 391 } 392 393 virtqueue_init_indirect_list(vq, dxp->indirect); 394 } 395 396 return (0); 397 } 398 399 static void 400 virtqueue_free_indirect(struct virtqueue *vq) 401 { 402 struct vq_desc_extra *dxp; 403 int i; 404 405 for (i = 0; i < vq->vq_nentries; i++) { 406 dxp = &vq->vq_descx[i]; 407 408 if (dxp->indirect == NULL) 409 break; 410 411 bus_dmamap_unload(vq->vq_indirect_dmat, dxp->mapp); 412 bus_dmamem_free(vq->vq_indirect_dmat, dxp->indirect, dxp->mapp); 413 dxp->indirect = NULL; 414 dxp->indirect_paddr = 0; 415 } 416 417 vq->vq_flags &= ~VIRTQUEUE_FLAG_INDIRECT; 418 vq->vq_indirect_mem_size = 0; 419 } 420 421 static void 422 virtqueue_init_indirect_list(struct virtqueue *vq, 423 struct vring_desc *indirect) 424 { 425 int i; 426 427 bzero(indirect, vq->vq_indirect_mem_size); 428 429 for (i = 0; i < vq->vq_max_indirect_size - 1; i++) 430 indirect[i].next = vq_gtoh16(vq, i + 1); 431 indirect[i].next = vq_gtoh16(vq, VQ_RING_DESC_CHAIN_END); 432 } 433 434 int 435 virtqueue_reinit(struct virtqueue *vq, uint16_t size) 436 { 437 struct vq_desc_extra *dxp; 438 int i; 439 440 if (vq->vq_nentries != size) { 441 device_printf(vq->vq_dev, 442 "%s: '%s' changed size; old=%hu, new=%hu\n", 443 __func__, vq->vq_name, vq->vq_nentries, size); 444 return (EINVAL); 445 } 446 447 /* Warn if the virtqueue was not properly cleaned up. */ 448 if (vq->vq_free_cnt != vq->vq_nentries) { 449 device_printf(vq->vq_dev, 450 "%s: warning '%s' virtqueue not empty, " 451 "leaking %d entries\n", __func__, vq->vq_name, 452 vq->vq_nentries - vq->vq_free_cnt); 453 } 454 455 vq->vq_desc_head_idx = 0; 456 vq->vq_used_cons_idx = 0; 457 vq->vq_queued_cnt = 0; 458 vq->vq_free_cnt = vq->vq_nentries; 459 460 /* To be safe, reset all our allocated memory. */ 461 bzero(vq->vq_ring_mem, vq->vq_ring_size); 462 for (i = 0; i < vq->vq_nentries; i++) { 463 dxp = &vq->vq_descx[i]; 464 dxp->cookie = NULL; 465 dxp->ndescs = 0; 466 if (vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT) 467 virtqueue_init_indirect_list(vq, dxp->indirect); 468 } 469 470 vq_ring_init(vq); 471 virtqueue_disable_intr(vq); 472 473 return (0); 474 } 475 476 void 477 virtqueue_free(struct virtqueue *vq) 478 { 479 480 if (vq->vq_free_cnt != vq->vq_nentries) { 481 device_printf(vq->vq_dev, "%s: freeing non-empty virtqueue, " 482 "leaking %d entries\n", vq->vq_name, 483 vq->vq_nentries - vq->vq_free_cnt); 484 } 485 486 if (vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT) 487 virtqueue_free_indirect(vq); 488 489 if (vq->vq_ring_mem != NULL) { 490 bus_dmamap_unload(vq->vq_ring_dmat, vq->vq_ring_mapp); 491 bus_dmamem_free(vq->vq_ring_dmat, vq->vq_ring_mem, 492 vq->vq_ring_mapp); 493 vq->vq_ring_size = 0; 494 } 495 496 if (vq->vq_ring_dmat != NULL) { 497 bus_dma_tag_destroy(vq->vq_ring_dmat); 498 } 499 500 free(vq, M_DEVBUF); 501 } 502 503 vm_paddr_t 504 virtqueue_paddr(struct virtqueue *vq) 505 { 506 return (vq->vq_ring_paddr); 507 } 508 509 vm_paddr_t 510 virtqueue_desc_paddr(struct virtqueue *vq) 511 { 512 return (vq->vq_ring.desc_paddr); 513 } 514 515 vm_paddr_t 516 virtqueue_avail_paddr(struct virtqueue *vq) 517 { 518 return (vq->vq_ring.avail_paddr); 519 } 520 521 vm_paddr_t 522 virtqueue_used_paddr(struct virtqueue *vq) 523 { 524 return (vq->vq_ring.used_paddr); 525 } 526 527 uint16_t 528 virtqueue_index(struct virtqueue *vq) 529 { 530 531 return (vq->vq_queue_index); 532 } 533 534 int 535 virtqueue_size(struct virtqueue *vq) 536 { 537 538 return (vq->vq_nentries); 539 } 540 541 int 542 virtqueue_nfree(struct virtqueue *vq) 543 { 544 545 return (vq->vq_free_cnt); 546 } 547 548 bool 549 virtqueue_empty(struct virtqueue *vq) 550 { 551 552 return (vq->vq_nentries == vq->vq_free_cnt); 553 } 554 555 bool 556 virtqueue_full(struct virtqueue *vq) 557 { 558 559 return (vq->vq_free_cnt == 0); 560 } 561 562 void 563 virtqueue_notify(struct virtqueue *vq) 564 { 565 /* Ensure updated avail->idx is visible to host. */ 566 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 567 BUS_DMASYNC_PREWRITE); 568 569 if (vq_ring_must_notify_host(vq)) 570 vq_ring_notify_host(vq); 571 vq->vq_queued_cnt = 0; 572 } 573 574 int 575 virtqueue_nused(struct virtqueue *vq) 576 { 577 uint16_t used_idx, nused; 578 579 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 580 BUS_DMASYNC_POSTREAD); 581 582 used_idx = vq_htog16(vq, vq->vq_ring.used->idx); 583 584 nused = (uint16_t)(used_idx - vq->vq_used_cons_idx); 585 VQASSERT(vq, nused <= vq->vq_nentries, "used more than available"); 586 587 return (nused); 588 } 589 590 int 591 virtqueue_intr_filter(struct virtqueue *vq) 592 { 593 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 594 BUS_DMASYNC_POSTREAD); 595 596 if (vq->vq_used_cons_idx == vq_htog16(vq, vq->vq_ring.used->idx)) 597 return (0); 598 599 virtqueue_disable_intr(vq); 600 601 return (1); 602 } 603 604 void 605 virtqueue_intr(struct virtqueue *vq) 606 { 607 608 vq->vq_intrhand(vq->vq_intrhand_arg); 609 } 610 611 int 612 virtqueue_enable_intr(struct virtqueue *vq) 613 { 614 615 return (vq_ring_enable_interrupt(vq, 0)); 616 } 617 618 int 619 virtqueue_postpone_intr(struct virtqueue *vq, vq_postpone_t hint) 620 { 621 uint16_t ndesc, avail_idx; 622 623 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 624 BUS_DMASYNC_POSTREAD); 625 626 avail_idx = vq_htog16(vq, vq->vq_ring.avail->idx); 627 ndesc = (uint16_t)(avail_idx - vq->vq_used_cons_idx); 628 629 switch (hint) { 630 case VQ_POSTPONE_SHORT: 631 ndesc = ndesc / 4; 632 break; 633 case VQ_POSTPONE_LONG: 634 ndesc = (ndesc * 3) / 4; 635 break; 636 case VQ_POSTPONE_EMPTIED: 637 break; 638 } 639 640 return (vq_ring_enable_interrupt(vq, ndesc)); 641 } 642 643 /* 644 * Note this is only considered a hint to the host. 645 */ 646 void 647 virtqueue_disable_intr(struct virtqueue *vq) 648 { 649 650 if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { 651 vring_used_event(&vq->vq_ring) = vq_gtoh16(vq, 652 vq->vq_used_cons_idx - vq->vq_nentries - 1); 653 return; 654 } 655 656 vq->vq_ring.avail->flags |= vq_gtoh16(vq, VRING_AVAIL_F_NO_INTERRUPT); 657 658 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 659 BUS_DMASYNC_PREWRITE); 660 } 661 662 int 663 virtqueue_enqueue(struct virtqueue *vq, void *cookie, struct sglist *sg, 664 int readable, int writable) 665 { 666 struct vq_desc_extra *dxp; 667 int needed; 668 uint16_t head_idx, idx; 669 670 needed = readable + writable; 671 672 VQASSERT(vq, cookie != NULL, "enqueuing with no cookie"); 673 VQASSERT(vq, needed == sg->sg_nseg, 674 "segment count mismatch, %d, %d", needed, sg->sg_nseg); 675 VQASSERT(vq, 676 needed <= vq->vq_nentries || needed <= vq->vq_max_indirect_size, 677 "too many segments to enqueue: %d, %d/%d", needed, 678 vq->vq_nentries, vq->vq_max_indirect_size); 679 680 if (needed < 1) 681 return (EINVAL); 682 if (vq->vq_free_cnt == 0) 683 return (ENOSPC); 684 685 if (vq_ring_use_indirect(vq, needed)) { 686 vq_ring_enqueue_indirect(vq, cookie, sg, readable, writable); 687 return (0); 688 } else if (vq->vq_free_cnt < needed) 689 return (EMSGSIZE); 690 691 head_idx = vq->vq_desc_head_idx; 692 VQ_RING_ASSERT_VALID_IDX(vq, head_idx); 693 dxp = &vq->vq_descx[head_idx]; 694 695 VQASSERT(vq, dxp->cookie == NULL, 696 "cookie already exists for index %d", head_idx); 697 dxp->cookie = cookie; 698 dxp->ndescs = needed; 699 700 idx = vq_ring_enqueue_segments(vq, vq->vq_ring.desc, head_idx, 701 sg, readable, writable); 702 703 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 704 BUS_DMASYNC_PREWRITE); 705 706 vq->vq_desc_head_idx = idx; 707 vq->vq_free_cnt -= needed; 708 if (vq->vq_free_cnt == 0) 709 VQ_RING_ASSERT_CHAIN_TERM(vq); 710 else 711 VQ_RING_ASSERT_VALID_IDX(vq, idx); 712 713 vq_ring_update_avail(vq, head_idx); 714 715 return (0); 716 } 717 718 void * 719 virtqueue_dequeue(struct virtqueue *vq, uint32_t *len) 720 { 721 struct vring_used_elem *uep; 722 void *cookie; 723 uint16_t used_idx, desc_idx; 724 725 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 726 BUS_DMASYNC_POSTREAD); 727 728 if (vq->vq_used_cons_idx == 729 vq_htog16(vq, atomic_load_16(&vq->vq_ring.used->idx))) 730 return (NULL); 731 732 used_idx = vq->vq_used_cons_idx++ & (vq->vq_nentries - 1); 733 uep = &vq->vq_ring.used->ring[used_idx]; 734 735 rmb(); 736 desc_idx = (uint16_t) vq_htog32(vq, uep->id); 737 if (len != NULL) 738 *len = vq_htog32(vq, uep->len); 739 740 vq_ring_free_chain(vq, desc_idx); 741 742 cookie = vq->vq_descx[desc_idx].cookie; 743 VQASSERT(vq, cookie != NULL, "no cookie for index %d", desc_idx); 744 vq->vq_descx[desc_idx].cookie = NULL; 745 746 return (cookie); 747 } 748 749 void * 750 virtqueue_poll(struct virtqueue *vq, uint32_t *len) 751 { 752 void *cookie; 753 754 while ((cookie = virtqueue_dequeue(vq, len)) == NULL) { 755 cpu_spinwait(); 756 } 757 758 return (cookie); 759 } 760 761 void * 762 virtqueue_drain(struct virtqueue *vq, int *last) 763 { 764 void *cookie; 765 int idx; 766 767 cookie = NULL; 768 idx = *last; 769 770 while (idx < vq->vq_nentries && cookie == NULL) { 771 if ((cookie = vq->vq_descx[idx].cookie) != NULL) { 772 vq->vq_descx[idx].cookie = NULL; 773 /* Free chain to keep free count consistent. */ 774 vq_ring_free_chain(vq, idx); 775 } 776 idx++; 777 } 778 779 *last = idx; 780 781 return (cookie); 782 } 783 784 void 785 virtqueue_dump(struct virtqueue *vq) 786 { 787 788 if (vq == NULL) 789 return; 790 791 printf("VQ: %s - size=%d; free=%d; used=%d; queued=%d; " 792 "desc_head_idx=%d; avail.idx=%d; used_cons_idx=%d; " 793 "used.idx=%d; used_event_idx=%d; avail.flags=0x%x; used.flags=0x%x\n", 794 vq->vq_name, vq->vq_nentries, vq->vq_free_cnt, virtqueue_nused(vq), 795 vq->vq_queued_cnt, vq->vq_desc_head_idx, 796 vq_htog16(vq, vq->vq_ring.avail->idx), vq->vq_used_cons_idx, 797 vq_htog16(vq, vq->vq_ring.used->idx), 798 vq_htog16(vq, vring_used_event(&vq->vq_ring)), 799 vq_htog16(vq, vq->vq_ring.avail->flags), 800 vq_htog16(vq, vq->vq_ring.used->flags)); 801 } 802 803 static void 804 vq_ring_init(struct virtqueue *vq) 805 { 806 struct vring *vr; 807 char *ring_mem; 808 int i, size; 809 810 ring_mem = vq->vq_ring_mem; 811 size = vq->vq_nentries; 812 vr = &vq->vq_ring; 813 814 vring_init(vr, size, ring_mem, vq->vq_ring_paddr, vq->vq_alignment); 815 816 for (i = 0; i < size - 1; i++) 817 vr->desc[i].next = vq_gtoh16(vq, i + 1); 818 vr->desc[i].next = vq_gtoh16(vq, VQ_RING_DESC_CHAIN_END); 819 820 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 821 BUS_DMASYNC_PREWRITE); 822 } 823 824 static void 825 vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx) 826 { 827 uint16_t avail_idx, avail_ring_idx; 828 829 /* 830 * Place the head of the descriptor chain into the next slot and make 831 * it usable to the host. The chain is made available now rather than 832 * deferring to virtqueue_notify() in the hopes that if the host is 833 * currently running on another CPU, we can keep it processing the new 834 * descriptor. 835 */ 836 avail_idx = vq_htog16(vq, vq->vq_ring.avail->idx); 837 avail_ring_idx = avail_idx & (vq->vq_nentries - 1); 838 vq->vq_ring.avail->ring[avail_ring_idx] = vq_gtoh16(vq, desc_idx); 839 840 wmb(); 841 vq->vq_ring.avail->idx = vq_gtoh16(vq, avail_idx + 1); 842 843 /* Keep pending count until virtqueue_notify(). */ 844 vq->vq_queued_cnt++; 845 846 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 847 BUS_DMASYNC_PREWRITE); 848 } 849 850 static uint16_t 851 vq_ring_enqueue_segments(struct virtqueue *vq, struct vring_desc *desc, 852 uint16_t head_idx, struct sglist *sg, int readable, int writable) 853 { 854 struct sglist_seg *seg; 855 struct vring_desc *dp; 856 int i, needed; 857 uint16_t idx; 858 859 SDT_PROBE6(virtqueue, , enqueue_segments, entry, vq, desc, head_idx, 860 sg, readable, writable); 861 862 needed = readable + writable; 863 864 for (i = 0, idx = head_idx, seg = sg->sg_segs; 865 i < needed; 866 i++, idx = vq_htog16(vq, dp->next), seg++) { 867 VQASSERT(vq, idx != VQ_RING_DESC_CHAIN_END, 868 "premature end of free desc chain"); 869 870 dp = &desc[idx]; 871 dp->addr = vq_gtoh64(vq, seg->ss_paddr); 872 dp->len = vq_gtoh32(vq, seg->ss_len); 873 dp->flags = 0; 874 875 if (i < needed - 1) 876 dp->flags |= vq_gtoh16(vq, VRING_DESC_F_NEXT); 877 if (i >= readable) 878 dp->flags |= vq_gtoh16(vq, VRING_DESC_F_WRITE); 879 } 880 881 SDT_PROBE1(virtqueue, , enqueue_segments, return, idx); 882 return (idx); 883 } 884 885 static bool 886 vq_ring_use_indirect(struct virtqueue *vq, int needed) 887 { 888 889 if ((vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT) == 0) 890 return (false); 891 892 if (vq->vq_max_indirect_size < needed) 893 return (false); 894 895 if (needed < 2) 896 return (false); 897 898 return (true); 899 } 900 901 static void 902 vq_ring_enqueue_indirect(struct virtqueue *vq, void *cookie, 903 struct sglist *sg, int readable, int writable) 904 { 905 struct vring_desc *dp; 906 struct vq_desc_extra *dxp; 907 int needed; 908 uint16_t head_idx; 909 910 needed = readable + writable; 911 VQASSERT(vq, needed <= vq->vq_max_indirect_size, 912 "enqueuing too many indirect descriptors"); 913 914 head_idx = vq->vq_desc_head_idx; 915 VQ_RING_ASSERT_VALID_IDX(vq, head_idx); 916 dp = &vq->vq_ring.desc[head_idx]; 917 dxp = &vq->vq_descx[head_idx]; 918 919 VQASSERT(vq, dxp->cookie == NULL, 920 "cookie already exists for index %d", head_idx); 921 dxp->cookie = cookie; 922 dxp->ndescs = 1; 923 924 dp->addr = vq_gtoh64(vq, dxp->indirect_paddr); 925 dp->len = vq_gtoh32(vq, needed * sizeof(struct vring_desc)); 926 dp->flags = vq_gtoh16(vq, VRING_DESC_F_INDIRECT); 927 928 vq_ring_enqueue_segments(vq, dxp->indirect, 0, 929 sg, readable, writable); 930 931 bus_dmamap_sync(vq->vq_indirect_dmat, dxp->mapp, BUS_DMASYNC_PREWRITE); 932 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 933 BUS_DMASYNC_PREWRITE); 934 935 vq->vq_desc_head_idx = vq_htog16(vq, dp->next); 936 vq->vq_free_cnt--; 937 if (vq->vq_free_cnt == 0) 938 VQ_RING_ASSERT_CHAIN_TERM(vq); 939 else 940 VQ_RING_ASSERT_VALID_IDX(vq, vq->vq_desc_head_idx); 941 942 vq_ring_update_avail(vq, head_idx); 943 } 944 945 static int 946 vq_ring_enable_interrupt(struct virtqueue *vq, uint16_t ndesc) 947 { 948 949 /* 950 * Enable interrupts, making sure we get the latest index of 951 * what's already been consumed. 952 */ 953 if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { 954 vring_used_event(&vq->vq_ring) = 955 vq_gtoh16(vq, vq->vq_used_cons_idx + ndesc); 956 } else { 957 vq->vq_ring.avail->flags &= 958 vq_gtoh16(vq, ~VRING_AVAIL_F_NO_INTERRUPT); 959 } 960 961 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 962 BUS_DMASYNC_PREWRITE); 963 964 /* 965 * Enough items may have already been consumed to meet our threshold 966 * since we last checked. Let our caller know so it processes the new 967 * entries. 968 */ 969 if (virtqueue_nused(vq) > ndesc) 970 return (1); 971 972 return (0); 973 } 974 975 static int 976 vq_ring_must_notify_host(struct virtqueue *vq) 977 { 978 uint16_t new_idx, prev_idx, event_idx, flags; 979 980 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 981 BUS_DMASYNC_POSTREAD); 982 983 if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { 984 new_idx = vq_htog16(vq, vq->vq_ring.avail->idx); 985 prev_idx = new_idx - vq->vq_queued_cnt; 986 event_idx = vq_htog16(vq, vring_avail_event(&vq->vq_ring)); 987 988 return (vring_need_event(event_idx, new_idx, prev_idx) != 0); 989 } 990 991 flags = vq->vq_ring.used->flags; 992 return ((flags & vq_gtoh16(vq, VRING_USED_F_NO_NOTIFY)) == 0); 993 } 994 995 static void 996 vq_ring_notify_host(struct virtqueue *vq) 997 { 998 999 VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index, 1000 vq->vq_notify_offset); 1001 } 1002 1003 static void 1004 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) 1005 { 1006 struct vring_desc *dp; 1007 struct vq_desc_extra *dxp; 1008 1009 VQ_RING_ASSERT_VALID_IDX(vq, desc_idx); 1010 dp = &vq->vq_ring.desc[desc_idx]; 1011 dxp = &vq->vq_descx[desc_idx]; 1012 1013 if (vq->vq_free_cnt == 0) 1014 VQ_RING_ASSERT_CHAIN_TERM(vq); 1015 1016 vq->vq_free_cnt += dxp->ndescs; 1017 dxp->ndescs--; 1018 1019 if ((dp->flags & vq_gtoh16(vq, VRING_DESC_F_INDIRECT)) == 0) { 1020 while (dp->flags & vq_gtoh16(vq, VRING_DESC_F_NEXT)) { 1021 uint16_t next_idx = vq_htog16(vq, dp->next); 1022 VQ_RING_ASSERT_VALID_IDX(vq, next_idx); 1023 dp = &vq->vq_ring.desc[next_idx]; 1024 dxp->ndescs--; 1025 } 1026 } 1027 1028 VQASSERT(vq, dxp->ndescs == 0, 1029 "failed to free entire desc chain, remaining: %d", dxp->ndescs); 1030 1031 /* 1032 * We must append the existing free chain, if any, to the end of 1033 * newly freed chain. If the virtqueue was completely used, then 1034 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). 1035 */ 1036 dp->next = vq_gtoh16(vq, vq->vq_desc_head_idx); 1037 vq->vq_desc_head_idx = desc_idx; 1038 1039 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 1040 BUS_DMASYNC_PREWRITE); 1041 } 1042