1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Implements the virtqueue interface as basically described 31 * in the original VirtIO paper. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/malloc.h> 38 #include <sys/mutex.h> 39 #include <sys/sdt.h> 40 #include <sys/sglist.h> 41 #include <vm/vm.h> 42 #include <vm/pmap.h> 43 44 #include <machine/cpu.h> 45 #include <machine/bus.h> 46 #include <machine/atomic.h> 47 #include <machine/resource.h> 48 #include <sys/bus.h> 49 #include <sys/rman.h> 50 51 #include <dev/virtio/virtio.h> 52 #include <dev/virtio/virtqueue.h> 53 #include <dev/virtio/virtio_ring.h> 54 55 #include "virtio_bus_if.h" 56 57 struct virtqueue { 58 device_t vq_dev; 59 struct mtx vq_ring_mtx; 60 struct mtx vq_indirect_mtx; 61 uint16_t vq_queue_index; 62 uint16_t vq_nentries; 63 uint32_t vq_flags; 64 #define VIRTQUEUE_FLAG_MODERN 0x0001 65 #define VIRTQUEUE_FLAG_INDIRECT 0x0002 66 #define VIRTQUEUE_FLAG_EVENT_IDX 0x0004 67 68 int vq_max_indirect_size; 69 bus_size_t vq_notify_offset; 70 virtqueue_intr_t *vq_intrhand; 71 void *vq_intrhand_arg; 72 73 struct vring vq_ring; 74 uint16_t vq_free_cnt; 75 uint16_t vq_queued_cnt; 76 /* 77 * Head of the free chain in the descriptor table. If 78 * there are no free descriptors, this will be set to 79 * VQ_RING_DESC_CHAIN_END. 80 */ 81 uint16_t vq_desc_head_idx; 82 /* 83 * Last consumed descriptor in the used table, 84 * trails vq_ring.used->idx. 85 */ 86 uint16_t vq_used_cons_idx; 87 88 void *vq_ring_mem; 89 bus_dmamap_t vq_ring_mapp; 90 vm_paddr_t vq_ring_paddr; 91 92 int vq_indirect_mem_size; 93 int vq_alignment; 94 int vq_ring_size; 95 char vq_name[VIRTQUEUE_MAX_NAME_SZ]; 96 97 bus_dma_tag_t vq_ring_dmat; 98 bus_dma_tag_t vq_indirect_dmat; 99 100 struct vq_desc_extra { 101 void *cookie; 102 struct vring_desc *indirect; 103 vm_paddr_t indirect_paddr; 104 bus_dmamap_t mapp; 105 uint16_t ndescs; 106 } vq_descx[0]; 107 }; 108 109 /* 110 * The maximum virtqueue size is 2^15. Use that value as the end of 111 * descriptor chain terminator since it will never be a valid index 112 * in the descriptor table. This is used to verify we are correctly 113 * handling vq_free_cnt. 114 */ 115 #define VQ_RING_DESC_CHAIN_END 32768 116 117 #define VQASSERT(_vq, _exp, _msg, ...) \ 118 KASSERT((_exp),("%s: %s - "_msg, __func__, (_vq)->vq_name, \ 119 ##__VA_ARGS__)) 120 121 #define VQ_RING_ASSERT_VALID_IDX(_vq, _idx) \ 122 VQASSERT((_vq), (_idx) < (_vq)->vq_nentries, \ 123 "invalid ring index: %d, max: %d", (_idx), \ 124 (_vq)->vq_nentries) 125 126 #define VQ_RING_ASSERT_CHAIN_TERM(_vq) \ 127 VQASSERT((_vq), (_vq)->vq_desc_head_idx == \ 128 VQ_RING_DESC_CHAIN_END, "full ring terminated " \ 129 "incorrectly: head idx: %d", (_vq)->vq_desc_head_idx) 130 131 static int virtqueue_init_indirect(struct virtqueue *vq, int); 132 static void virtqueue_free_indirect(struct virtqueue *vq); 133 static void virtqueue_init_indirect_list(struct virtqueue *, 134 struct vring_desc *); 135 136 static void vq_ring_init(struct virtqueue *); 137 static void vq_ring_update_avail(struct virtqueue *, uint16_t); 138 static uint16_t vq_ring_enqueue_segments(struct virtqueue *, 139 struct vring_desc *, uint16_t, struct sglist *, int, int); 140 static bool vq_ring_use_indirect(struct virtqueue *, int); 141 static void vq_ring_enqueue_indirect(struct virtqueue *, void *, 142 struct sglist *, int, int); 143 static int vq_ring_enable_interrupt(struct virtqueue *, uint16_t); 144 static int vq_ring_must_notify_host(struct virtqueue *); 145 static void vq_ring_notify_host(struct virtqueue *); 146 static void vq_ring_free_chain(struct virtqueue *, uint16_t); 147 148 SDT_PROVIDER_DEFINE(virtqueue); 149 SDT_PROBE_DEFINE6(virtqueue, , enqueue_segments, entry, "struct virtqueue *", 150 "struct vring_desc *", "uint16_t", "struct sglist *", "int", "int"); 151 SDT_PROBE_DEFINE1(virtqueue, , enqueue_segments, return, "uint16_t"); 152 153 #define vq_modern(_vq) (((_vq)->vq_flags & VIRTQUEUE_FLAG_MODERN) != 0) 154 #define vq_htog16(_vq, _val) virtio_htog16(vq_modern(_vq), _val) 155 #define vq_htog32(_vq, _val) virtio_htog32(vq_modern(_vq), _val) 156 #define vq_htog64(_vq, _val) virtio_htog64(vq_modern(_vq), _val) 157 #define vq_gtoh16(_vq, _val) virtio_gtoh16(vq_modern(_vq), _val) 158 #define vq_gtoh32(_vq, _val) virtio_gtoh32(vq_modern(_vq), _val) 159 #define vq_gtoh64(_vq, _val) virtio_gtoh64(vq_modern(_vq), _val) 160 161 static void 162 virtqueue_ring_load_callback(void *arg, bus_dma_segment_t *segs, 163 int nsegs, int error) 164 { 165 struct virtqueue *vq; 166 167 if (error != 0) 168 return; 169 170 KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); 171 172 vq = (struct virtqueue *)arg; 173 vq->vq_ring_paddr = segs[0].ds_addr; 174 } 175 176 int 177 virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size, 178 bus_size_t notify_offset, int align, vm_paddr_t highaddr, 179 struct vq_alloc_info *info, struct virtqueue **vqp) 180 { 181 struct virtqueue *vq; 182 int error; 183 184 *vqp = NULL; 185 error = 0; 186 187 if (size == 0) { 188 device_printf(dev, 189 "virtqueue %d (%s) does not exist (size is zero)\n", 190 queue, info->vqai_name); 191 return (ENODEV); 192 } else if (!powerof2(size)) { 193 device_printf(dev, 194 "virtqueue %d (%s) size is not a power of 2: %d\n", 195 queue, info->vqai_name, size); 196 return (ENXIO); 197 } else if (info->vqai_maxindirsz > VIRTIO_MAX_INDIRECT) { 198 device_printf(dev, "virtqueue %d (%s) requested too many " 199 "indirect descriptors: %d, max %d\n", 200 queue, info->vqai_name, info->vqai_maxindirsz, 201 VIRTIO_MAX_INDIRECT); 202 return (EINVAL); 203 } 204 205 vq = malloc(sizeof(struct virtqueue) + 206 size * sizeof(struct vq_desc_extra), M_DEVBUF, M_NOWAIT | M_ZERO); 207 if (vq == NULL) { 208 device_printf(dev, "cannot allocate virtqueue\n"); 209 return (ENOMEM); 210 } 211 212 vq->vq_dev = dev; 213 strlcpy(vq->vq_name, info->vqai_name, sizeof(vq->vq_name)); 214 vq->vq_queue_index = queue; 215 vq->vq_notify_offset = notify_offset; 216 vq->vq_alignment = align; 217 vq->vq_nentries = size; 218 vq->vq_free_cnt = size; 219 vq->vq_intrhand = info->vqai_intr; 220 vq->vq_intrhand_arg = info->vqai_intr_arg; 221 222 if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_F_VERSION_1) != 0) 223 vq->vq_flags |= VIRTQUEUE_FLAG_MODERN; 224 if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_EVENT_IDX) != 0) 225 vq->vq_flags |= VIRTQUEUE_FLAG_EVENT_IDX; 226 227 vq->vq_ring_size = round_page(vring_size(size, align)); 228 229 mtx_init(&vq->vq_ring_mtx, device_get_nameunit(dev), 230 "VirtIO Queue Lock", MTX_DEF); 231 232 error = bus_dma_tag_create( 233 bus_get_dma_tag(dev), /* parent */ 234 align, /* alignment */ 235 0, /* boundary */ 236 BUS_SPACE_MAXADDR, /* lowaddr */ 237 BUS_SPACE_MAXADDR, /* highaddr */ 238 NULL, NULL, /* filter, filterarg */ 239 vq->vq_ring_size, /* max request size */ 240 1, /* max # segments */ 241 vq->vq_ring_size, /* maxsegsize */ 242 BUS_DMA_COHERENT, /* flags */ 243 busdma_lock_mutex, /* lockfunc */ 244 &vq->vq_ring_mtx, /* lockarg */ 245 &vq->vq_ring_dmat); 246 if (error) { 247 device_printf(dev, "cannot create bus_dma_tag\n"); 248 goto fail; 249 } 250 251 #ifdef __powerpc__ 252 /* 253 * Virtio uses physical addresses rather than bus addresses, so we 254 * need to ask busdma to skip the iommu physical->bus mapping. At 255 * present, this is only a thing on the powerpc architectures. 256 */ 257 bus_dma_tag_set_iommu(vq->vq_ring_dmat, NULL, NULL); 258 #endif 259 260 if (info->vqai_maxindirsz > 1) { 261 error = virtqueue_init_indirect(vq, info->vqai_maxindirsz); 262 if (error) 263 goto fail; 264 } 265 266 error = bus_dmamem_alloc(vq->vq_ring_dmat, &vq->vq_ring_mem, 267 BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, 268 &vq->vq_ring_mapp); 269 if (error) { 270 device_printf(dev, "bus_dmamem_alloc failed\n"); 271 goto fail; 272 } 273 274 error = bus_dmamap_load(vq->vq_ring_dmat, vq->vq_ring_mapp, 275 vq->vq_ring_mem, vq->vq_ring_size, virtqueue_ring_load_callback, 276 vq, BUS_DMA_NOWAIT); 277 if (error) { 278 device_printf(dev, "vq->vq_ring_mapp load failed\n"); 279 goto fail; 280 } 281 282 vq_ring_init(vq); 283 virtqueue_disable_intr(vq); 284 285 *vqp = vq; 286 287 fail: 288 if (error) 289 virtqueue_free(vq); 290 291 return (error); 292 } 293 294 static void 295 virtqueue_indirect_load_callback(void *arg, bus_dma_segment_t *segs, 296 int nsegs, int error) 297 { 298 struct vq_desc_extra *dxp; 299 300 if (error != 0) 301 return; 302 303 KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); 304 305 dxp = (struct vq_desc_extra *)arg; 306 dxp->indirect_paddr = segs[0].ds_addr; 307 } 308 309 static int 310 virtqueue_init_indirect(struct virtqueue *vq, int indirect_size) 311 { 312 device_t dev; 313 struct vq_desc_extra *dxp; 314 int i, size; 315 int error; 316 int align; 317 318 dev = vq->vq_dev; 319 320 if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0) { 321 /* 322 * Indirect descriptors requested by the driver but not 323 * negotiated. Return zero to keep the initialization 324 * going: we'll run fine without. 325 */ 326 if (bootverbose) 327 device_printf(dev, "virtqueue %d (%s) requested " 328 "indirect descriptors but not negotiated\n", 329 vq->vq_queue_index, vq->vq_name); 330 return (0); 331 } 332 333 size = indirect_size * sizeof(struct vring_desc); 334 vq->vq_max_indirect_size = indirect_size; 335 vq->vq_indirect_mem_size = size; 336 vq->vq_flags |= VIRTQUEUE_FLAG_INDIRECT; 337 338 mtx_init(&vq->vq_indirect_mtx, device_get_nameunit(dev), 339 "VirtIO Indirect Queue Lock", MTX_DEF); 340 341 align = size; 342 error = bus_dma_tag_create( 343 bus_get_dma_tag(dev), /* parent */ 344 roundup_pow_of_two(align), /* alignment */ 345 0, /* boundary */ 346 BUS_SPACE_MAXADDR, /* lowaddr */ 347 BUS_SPACE_MAXADDR, /* highaddr */ 348 NULL, NULL, /* filter, filterarg */ 349 size, /* max request size */ 350 1, /* max # segments */ 351 size, /* maxsegsize */ 352 BUS_DMA_COHERENT, /* flags */ 353 busdma_lock_mutex, /* lockfunc */ 354 &vq->vq_indirect_mtx, /* lockarg */ 355 &vq->vq_indirect_dmat); 356 if (error) { 357 device_printf(dev, "cannot create indirect bus_dma_tag\n"); 358 return (error); 359 } 360 361 #ifdef __powerpc__ 362 /* 363 * Virtio uses physical addresses rather than bus addresses, so we 364 * need to ask busdma to skip the iommu physical->bus mapping. At 365 * present, this is only a thing on the powerpc architectures. 366 */ 367 bus_dma_tag_set_iommu(vq->vq_indirect_dmat, NULL, NULL); 368 #endif 369 370 for (i = 0; i < vq->vq_nentries; i++) { 371 dxp = &vq->vq_descx[i]; 372 373 error = bus_dmamem_alloc(vq->vq_indirect_dmat, 374 (void **)&dxp->indirect, 375 BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, 376 &dxp->mapp); 377 if (error) { 378 panic("dxp->mapp alloc failed\n"); 379 return (error); 380 } 381 382 error = bus_dmamap_load(vq->vq_indirect_dmat, dxp->mapp, 383 dxp->indirect, size, virtqueue_indirect_load_callback, dxp, 384 BUS_DMA_NOWAIT); 385 if (error) { 386 panic("dxp->mapp load failed\n"); 387 bus_dmamem_free(vq->vq_indirect_dmat, dxp->indirect, 388 dxp->mapp); 389 dxp->indirect = NULL; 390 return (error); 391 } 392 393 virtqueue_init_indirect_list(vq, dxp->indirect); 394 } 395 396 return (0); 397 } 398 399 static void 400 virtqueue_free_indirect(struct virtqueue *vq) 401 { 402 struct vq_desc_extra *dxp; 403 int i; 404 405 for (i = 0; i < vq->vq_nentries; i++) { 406 dxp = &vq->vq_descx[i]; 407 408 if (dxp->indirect == NULL) 409 break; 410 411 bus_dmamap_unload(vq->vq_indirect_dmat, dxp->mapp); 412 bus_dmamem_free(vq->vq_indirect_dmat, dxp->indirect, dxp->mapp); 413 dxp->indirect = NULL; 414 dxp->indirect_paddr = 0; 415 } 416 417 vq->vq_flags &= ~VIRTQUEUE_FLAG_INDIRECT; 418 vq->vq_indirect_mem_size = 0; 419 } 420 421 static void 422 virtqueue_init_indirect_list(struct virtqueue *vq, 423 struct vring_desc *indirect) 424 { 425 int i; 426 427 bzero(indirect, vq->vq_indirect_mem_size); 428 429 for (i = 0; i < vq->vq_max_indirect_size - 1; i++) 430 indirect[i].next = vq_gtoh16(vq, i + 1); 431 indirect[i].next = vq_gtoh16(vq, VQ_RING_DESC_CHAIN_END); 432 } 433 434 int 435 virtqueue_reinit(struct virtqueue *vq, uint16_t size) 436 { 437 struct vq_desc_extra *dxp; 438 int i; 439 440 if (vq->vq_nentries != size) { 441 device_printf(vq->vq_dev, 442 "%s: '%s' changed size; old=%hu, new=%hu\n", 443 __func__, vq->vq_name, vq->vq_nentries, size); 444 return (EINVAL); 445 } 446 447 /* Warn if the virtqueue was not properly cleaned up. */ 448 if (vq->vq_free_cnt != vq->vq_nentries) { 449 device_printf(vq->vq_dev, 450 "%s: warning '%s' virtqueue not empty, " 451 "leaking %d entries\n", __func__, vq->vq_name, 452 vq->vq_nentries - vq->vq_free_cnt); 453 } 454 455 vq->vq_desc_head_idx = 0; 456 vq->vq_used_cons_idx = 0; 457 vq->vq_queued_cnt = 0; 458 vq->vq_free_cnt = vq->vq_nentries; 459 460 /* To be safe, reset all our allocated memory. */ 461 bzero(vq->vq_ring_mem, vq->vq_ring_size); 462 for (i = 0; i < vq->vq_nentries; i++) { 463 dxp = &vq->vq_descx[i]; 464 dxp->cookie = NULL; 465 dxp->ndescs = 0; 466 if (vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT) 467 virtqueue_init_indirect_list(vq, dxp->indirect); 468 } 469 470 vq_ring_init(vq); 471 virtqueue_disable_intr(vq); 472 473 return (0); 474 } 475 476 void 477 virtqueue_free(struct virtqueue *vq) 478 { 479 480 if (vq->vq_free_cnt != vq->vq_nentries) { 481 device_printf(vq->vq_dev, "%s: freeing non-empty virtqueue, " 482 "leaking %d entries\n", vq->vq_name, 483 vq->vq_nentries - vq->vq_free_cnt); 484 } 485 486 if (vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT) 487 virtqueue_free_indirect(vq); 488 489 if (vq->vq_ring_mem != NULL) { 490 bus_dmamap_unload(vq->vq_ring_dmat, vq->vq_ring_mapp); 491 bus_dmamem_free(vq->vq_ring_dmat, vq->vq_ring_mem, 492 vq->vq_ring_mapp); 493 vq->vq_ring_size = 0; 494 } 495 496 if (vq->vq_ring_dmat != NULL) { 497 bus_dma_tag_destroy(vq->vq_ring_dmat); 498 } 499 500 free(vq, M_DEVBUF); 501 } 502 503 vm_paddr_t 504 virtqueue_paddr(struct virtqueue *vq) 505 { 506 return (vq->vq_ring_paddr); 507 } 508 509 vm_paddr_t 510 virtqueue_desc_paddr(struct virtqueue *vq) 511 { 512 return (vq->vq_ring.desc_paddr); 513 } 514 515 vm_paddr_t 516 virtqueue_avail_paddr(struct virtqueue *vq) 517 { 518 return (vq->vq_ring.avail_paddr); 519 } 520 521 vm_paddr_t 522 virtqueue_used_paddr(struct virtqueue *vq) 523 { 524 return (vq->vq_ring.used_paddr); 525 } 526 527 uint16_t 528 virtqueue_index(struct virtqueue *vq) 529 { 530 531 return (vq->vq_queue_index); 532 } 533 534 int 535 virtqueue_size(struct virtqueue *vq) 536 { 537 538 return (vq->vq_nentries); 539 } 540 541 int 542 virtqueue_nfree(struct virtqueue *vq) 543 { 544 545 return (vq->vq_free_cnt); 546 } 547 548 bool 549 virtqueue_empty(struct virtqueue *vq) 550 { 551 552 return (vq->vq_nentries == vq->vq_free_cnt); 553 } 554 555 bool 556 virtqueue_full(struct virtqueue *vq) 557 { 558 559 return (vq->vq_free_cnt == 0); 560 } 561 562 void 563 virtqueue_notify(struct virtqueue *vq) 564 { 565 /* Ensure updated avail->idx is visible to host. */ 566 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 567 BUS_DMASYNC_PREWRITE); 568 #if defined(__i386__) || defined(__amd64__) 569 mb(); 570 #endif 571 572 if (vq_ring_must_notify_host(vq)) 573 vq_ring_notify_host(vq); 574 vq->vq_queued_cnt = 0; 575 } 576 577 int 578 virtqueue_nused(struct virtqueue *vq) 579 { 580 uint16_t used_idx, nused; 581 582 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 583 BUS_DMASYNC_POSTREAD); 584 585 used_idx = vq_htog16(vq, vq->vq_ring.used->idx); 586 587 nused = (uint16_t)(used_idx - vq->vq_used_cons_idx); 588 VQASSERT(vq, nused <= vq->vq_nentries, "used more than available"); 589 590 return (nused); 591 } 592 593 int 594 virtqueue_intr_filter(struct virtqueue *vq) 595 { 596 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 597 BUS_DMASYNC_POSTREAD); 598 599 if (vq->vq_used_cons_idx == vq_htog16(vq, vq->vq_ring.used->idx)) 600 return (0); 601 602 virtqueue_disable_intr(vq); 603 604 return (1); 605 } 606 607 void 608 virtqueue_intr(struct virtqueue *vq) 609 { 610 611 vq->vq_intrhand(vq->vq_intrhand_arg); 612 } 613 614 int 615 virtqueue_enable_intr(struct virtqueue *vq) 616 { 617 618 return (vq_ring_enable_interrupt(vq, 0)); 619 } 620 621 int 622 virtqueue_postpone_intr(struct virtqueue *vq, vq_postpone_t hint) 623 { 624 uint16_t ndesc, avail_idx; 625 626 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 627 BUS_DMASYNC_POSTREAD); 628 629 avail_idx = vq_htog16(vq, vq->vq_ring.avail->idx); 630 ndesc = (uint16_t)(avail_idx - vq->vq_used_cons_idx); 631 632 switch (hint) { 633 case VQ_POSTPONE_SHORT: 634 ndesc = ndesc / 4; 635 break; 636 case VQ_POSTPONE_LONG: 637 ndesc = (ndesc * 3) / 4; 638 break; 639 case VQ_POSTPONE_EMPTIED: 640 break; 641 } 642 643 return (vq_ring_enable_interrupt(vq, ndesc)); 644 } 645 646 /* 647 * Note this is only considered a hint to the host. 648 */ 649 void 650 virtqueue_disable_intr(struct virtqueue *vq) 651 { 652 653 if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { 654 vring_used_event(&vq->vq_ring) = vq_gtoh16(vq, 655 vq->vq_used_cons_idx - vq->vq_nentries - 1); 656 return; 657 } 658 659 vq->vq_ring.avail->flags |= vq_gtoh16(vq, VRING_AVAIL_F_NO_INTERRUPT); 660 661 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 662 BUS_DMASYNC_PREWRITE); 663 } 664 665 int 666 virtqueue_enqueue(struct virtqueue *vq, void *cookie, struct sglist *sg, 667 int readable, int writable) 668 { 669 struct vq_desc_extra *dxp; 670 int needed; 671 uint16_t head_idx, idx; 672 673 needed = readable + writable; 674 675 VQASSERT(vq, cookie != NULL, "enqueuing with no cookie"); 676 VQASSERT(vq, needed == sg->sg_nseg, 677 "segment count mismatch, %d, %d", needed, sg->sg_nseg); 678 VQASSERT(vq, 679 needed <= vq->vq_nentries || needed <= vq->vq_max_indirect_size, 680 "too many segments to enqueue: %d, %d/%d", needed, 681 vq->vq_nentries, vq->vq_max_indirect_size); 682 683 if (needed < 1) 684 return (EINVAL); 685 if (vq->vq_free_cnt == 0) 686 return (ENOSPC); 687 688 if (vq_ring_use_indirect(vq, needed)) { 689 vq_ring_enqueue_indirect(vq, cookie, sg, readable, writable); 690 return (0); 691 } else if (vq->vq_free_cnt < needed) 692 return (EMSGSIZE); 693 694 head_idx = vq->vq_desc_head_idx; 695 VQ_RING_ASSERT_VALID_IDX(vq, head_idx); 696 dxp = &vq->vq_descx[head_idx]; 697 698 VQASSERT(vq, dxp->cookie == NULL, 699 "cookie already exists for index %d", head_idx); 700 dxp->cookie = cookie; 701 dxp->ndescs = needed; 702 703 idx = vq_ring_enqueue_segments(vq, vq->vq_ring.desc, head_idx, 704 sg, readable, writable); 705 706 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 707 BUS_DMASYNC_PREWRITE); 708 709 vq->vq_desc_head_idx = idx; 710 vq->vq_free_cnt -= needed; 711 if (vq->vq_free_cnt == 0) 712 VQ_RING_ASSERT_CHAIN_TERM(vq); 713 else 714 VQ_RING_ASSERT_VALID_IDX(vq, idx); 715 716 vq_ring_update_avail(vq, head_idx); 717 718 return (0); 719 } 720 721 void * 722 virtqueue_dequeue(struct virtqueue *vq, uint32_t *len) 723 { 724 struct vring_used_elem *uep; 725 void *cookie; 726 uint16_t used_idx, desc_idx; 727 728 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 729 BUS_DMASYNC_POSTREAD); 730 731 if (vq->vq_used_cons_idx == 732 vq_htog16(vq, atomic_load_16(&vq->vq_ring.used->idx))) 733 return (NULL); 734 735 used_idx = vq->vq_used_cons_idx++ & (vq->vq_nentries - 1); 736 uep = &vq->vq_ring.used->ring[used_idx]; 737 738 rmb(); 739 desc_idx = (uint16_t) vq_htog32(vq, uep->id); 740 if (len != NULL) 741 *len = vq_htog32(vq, uep->len); 742 743 vq_ring_free_chain(vq, desc_idx); 744 745 cookie = vq->vq_descx[desc_idx].cookie; 746 VQASSERT(vq, cookie != NULL, "no cookie for index %d", desc_idx); 747 vq->vq_descx[desc_idx].cookie = NULL; 748 749 return (cookie); 750 } 751 752 void * 753 virtqueue_poll(struct virtqueue *vq, uint32_t *len) 754 { 755 void *cookie; 756 757 while ((cookie = virtqueue_dequeue(vq, len)) == NULL) { 758 cpu_spinwait(); 759 } 760 761 return (cookie); 762 } 763 764 void * 765 virtqueue_drain(struct virtqueue *vq, int *last) 766 { 767 void *cookie; 768 int idx; 769 770 cookie = NULL; 771 idx = *last; 772 773 while (idx < vq->vq_nentries && cookie == NULL) { 774 if ((cookie = vq->vq_descx[idx].cookie) != NULL) { 775 vq->vq_descx[idx].cookie = NULL; 776 /* Free chain to keep free count consistent. */ 777 vq_ring_free_chain(vq, idx); 778 } 779 idx++; 780 } 781 782 *last = idx; 783 784 return (cookie); 785 } 786 787 void 788 virtqueue_dump(struct virtqueue *vq) 789 { 790 791 if (vq == NULL) 792 return; 793 794 printf("VQ: %s - size=%d; free=%d; used=%d; queued=%d; " 795 "desc_head_idx=%d; avail.idx=%d; used_cons_idx=%d; " 796 "used.idx=%d; used_event_idx=%d; avail.flags=0x%x; used.flags=0x%x\n", 797 vq->vq_name, vq->vq_nentries, vq->vq_free_cnt, virtqueue_nused(vq), 798 vq->vq_queued_cnt, vq->vq_desc_head_idx, 799 vq_htog16(vq, vq->vq_ring.avail->idx), vq->vq_used_cons_idx, 800 vq_htog16(vq, vq->vq_ring.used->idx), 801 vq_htog16(vq, vring_used_event(&vq->vq_ring)), 802 vq_htog16(vq, vq->vq_ring.avail->flags), 803 vq_htog16(vq, vq->vq_ring.used->flags)); 804 } 805 806 static void 807 vq_ring_init(struct virtqueue *vq) 808 { 809 struct vring *vr; 810 char *ring_mem; 811 int i, size; 812 813 ring_mem = vq->vq_ring_mem; 814 size = vq->vq_nentries; 815 vr = &vq->vq_ring; 816 817 vring_init(vr, size, ring_mem, vq->vq_ring_paddr, vq->vq_alignment); 818 819 for (i = 0; i < size - 1; i++) 820 vr->desc[i].next = vq_gtoh16(vq, i + 1); 821 vr->desc[i].next = vq_gtoh16(vq, VQ_RING_DESC_CHAIN_END); 822 823 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 824 BUS_DMASYNC_PREWRITE); 825 } 826 827 static void 828 vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx) 829 { 830 uint16_t avail_idx, avail_ring_idx; 831 832 /* 833 * Place the head of the descriptor chain into the next slot and make 834 * it usable to the host. The chain is made available now rather than 835 * deferring to virtqueue_notify() in the hopes that if the host is 836 * currently running on another CPU, we can keep it processing the new 837 * descriptor. 838 */ 839 avail_idx = vq_htog16(vq, vq->vq_ring.avail->idx); 840 avail_ring_idx = avail_idx & (vq->vq_nentries - 1); 841 vq->vq_ring.avail->ring[avail_ring_idx] = vq_gtoh16(vq, desc_idx); 842 843 wmb(); 844 vq->vq_ring.avail->idx = vq_gtoh16(vq, avail_idx + 1); 845 846 /* Keep pending count until virtqueue_notify(). */ 847 vq->vq_queued_cnt++; 848 849 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 850 BUS_DMASYNC_PREWRITE); 851 } 852 853 static uint16_t 854 vq_ring_enqueue_segments(struct virtqueue *vq, struct vring_desc *desc, 855 uint16_t head_idx, struct sglist *sg, int readable, int writable) 856 { 857 struct sglist_seg *seg; 858 struct vring_desc *dp; 859 int i, needed; 860 uint16_t idx; 861 862 SDT_PROBE6(virtqueue, , enqueue_segments, entry, vq, desc, head_idx, 863 sg, readable, writable); 864 865 needed = readable + writable; 866 867 for (i = 0, idx = head_idx, seg = sg->sg_segs; 868 i < needed; 869 i++, idx = vq_htog16(vq, dp->next), seg++) { 870 VQASSERT(vq, idx != VQ_RING_DESC_CHAIN_END, 871 "premature end of free desc chain"); 872 873 dp = &desc[idx]; 874 dp->addr = vq_gtoh64(vq, seg->ss_paddr); 875 dp->len = vq_gtoh32(vq, seg->ss_len); 876 dp->flags = 0; 877 878 if (i < needed - 1) 879 dp->flags |= vq_gtoh16(vq, VRING_DESC_F_NEXT); 880 if (i >= readable) 881 dp->flags |= vq_gtoh16(vq, VRING_DESC_F_WRITE); 882 } 883 884 SDT_PROBE1(virtqueue, , enqueue_segments, return, idx); 885 return (idx); 886 } 887 888 static bool 889 vq_ring_use_indirect(struct virtqueue *vq, int needed) 890 { 891 892 if ((vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT) == 0) 893 return (false); 894 895 if (vq->vq_max_indirect_size < needed) 896 return (false); 897 898 if (needed < 2) 899 return (false); 900 901 return (true); 902 } 903 904 static void 905 vq_ring_enqueue_indirect(struct virtqueue *vq, void *cookie, 906 struct sglist *sg, int readable, int writable) 907 { 908 struct vring_desc *dp; 909 struct vq_desc_extra *dxp; 910 int needed; 911 uint16_t head_idx; 912 913 needed = readable + writable; 914 VQASSERT(vq, needed <= vq->vq_max_indirect_size, 915 "enqueuing too many indirect descriptors"); 916 917 head_idx = vq->vq_desc_head_idx; 918 VQ_RING_ASSERT_VALID_IDX(vq, head_idx); 919 dp = &vq->vq_ring.desc[head_idx]; 920 dxp = &vq->vq_descx[head_idx]; 921 922 VQASSERT(vq, dxp->cookie == NULL, 923 "cookie already exists for index %d", head_idx); 924 dxp->cookie = cookie; 925 dxp->ndescs = 1; 926 927 dp->addr = vq_gtoh64(vq, dxp->indirect_paddr); 928 dp->len = vq_gtoh32(vq, needed * sizeof(struct vring_desc)); 929 dp->flags = vq_gtoh16(vq, VRING_DESC_F_INDIRECT); 930 931 vq_ring_enqueue_segments(vq, dxp->indirect, 0, 932 sg, readable, writable); 933 934 bus_dmamap_sync(vq->vq_indirect_dmat, dxp->mapp, BUS_DMASYNC_PREWRITE); 935 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 936 BUS_DMASYNC_PREWRITE); 937 938 vq->vq_desc_head_idx = vq_htog16(vq, dp->next); 939 vq->vq_free_cnt--; 940 if (vq->vq_free_cnt == 0) 941 VQ_RING_ASSERT_CHAIN_TERM(vq); 942 else 943 VQ_RING_ASSERT_VALID_IDX(vq, vq->vq_desc_head_idx); 944 945 vq_ring_update_avail(vq, head_idx); 946 } 947 948 static int 949 vq_ring_enable_interrupt(struct virtqueue *vq, uint16_t ndesc) 950 { 951 952 /* 953 * Enable interrupts, making sure we get the latest index of 954 * what's already been consumed. 955 */ 956 if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { 957 vring_used_event(&vq->vq_ring) = 958 vq_gtoh16(vq, vq->vq_used_cons_idx + ndesc); 959 } else { 960 vq->vq_ring.avail->flags &= 961 vq_gtoh16(vq, ~VRING_AVAIL_F_NO_INTERRUPT); 962 } 963 964 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 965 BUS_DMASYNC_PREWRITE); 966 #if defined(__i386__) || defined(__amd64__) 967 mb(); 968 #endif 969 970 /* 971 * Enough items may have already been consumed to meet our threshold 972 * since we last checked. Let our caller know so it processes the new 973 * entries. 974 */ 975 if (virtqueue_nused(vq) > ndesc) 976 return (1); 977 978 return (0); 979 } 980 981 static int 982 vq_ring_must_notify_host(struct virtqueue *vq) 983 { 984 uint16_t new_idx, prev_idx, event_idx, flags; 985 986 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 987 BUS_DMASYNC_POSTREAD); 988 989 if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { 990 new_idx = vq_htog16(vq, vq->vq_ring.avail->idx); 991 prev_idx = new_idx - vq->vq_queued_cnt; 992 event_idx = vq_htog16(vq, vring_avail_event(&vq->vq_ring)); 993 994 return (vring_need_event(event_idx, new_idx, prev_idx) != 0); 995 } 996 997 flags = vq->vq_ring.used->flags; 998 return ((flags & vq_gtoh16(vq, VRING_USED_F_NO_NOTIFY)) == 0); 999 } 1000 1001 static void 1002 vq_ring_notify_host(struct virtqueue *vq) 1003 { 1004 1005 VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index, 1006 vq->vq_notify_offset); 1007 } 1008 1009 static void 1010 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) 1011 { 1012 struct vring_desc *dp; 1013 struct vq_desc_extra *dxp; 1014 1015 VQ_RING_ASSERT_VALID_IDX(vq, desc_idx); 1016 dp = &vq->vq_ring.desc[desc_idx]; 1017 dxp = &vq->vq_descx[desc_idx]; 1018 1019 if (vq->vq_free_cnt == 0) 1020 VQ_RING_ASSERT_CHAIN_TERM(vq); 1021 1022 vq->vq_free_cnt += dxp->ndescs; 1023 dxp->ndescs--; 1024 1025 if ((dp->flags & vq_gtoh16(vq, VRING_DESC_F_INDIRECT)) == 0) { 1026 while (dp->flags & vq_gtoh16(vq, VRING_DESC_F_NEXT)) { 1027 uint16_t next_idx = vq_htog16(vq, dp->next); 1028 VQ_RING_ASSERT_VALID_IDX(vq, next_idx); 1029 dp = &vq->vq_ring.desc[next_idx]; 1030 dxp->ndescs--; 1031 } 1032 } 1033 1034 VQASSERT(vq, dxp->ndescs == 0, 1035 "failed to free entire desc chain, remaining: %d", dxp->ndescs); 1036 1037 /* 1038 * We must append the existing free chain, if any, to the end of 1039 * newly freed chain. If the virtqueue was completely used, then 1040 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). 1041 */ 1042 dp->next = vq_gtoh16(vq, vq->vq_desc_head_idx); 1043 vq->vq_desc_head_idx = desc_idx; 1044 1045 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp, 1046 BUS_DMASYNC_PREWRITE); 1047 } 1048