xref: /freebsd/sys/dev/virtio/virtqueue.c (revision d5be41beb7c44119730791d92782d8e77174d312)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Implements the virtqueue interface as basically described
31  * in the original VirtIO paper.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/sglist.h>
42 #include <vm/vm.h>
43 #include <vm/pmap.h>
44 
45 #include <machine/cpu.h>
46 #include <machine/bus.h>
47 #include <machine/atomic.h>
48 #include <machine/resource.h>
49 #include <sys/bus.h>
50 #include <sys/rman.h>
51 
52 #include <dev/virtio/virtio.h>
53 #include <dev/virtio/virtqueue.h>
54 #include <dev/virtio/virtio_ring.h>
55 
56 #include "virtio_bus_if.h"
57 
58 struct virtqueue {
59 	device_t		 vq_dev;
60 	uint16_t		 vq_queue_index;
61 	uint16_t		 vq_nentries;
62 	uint32_t		 vq_flags;
63 #define	VIRTQUEUE_FLAG_MODERN	 0x0001
64 #define	VIRTQUEUE_FLAG_INDIRECT	 0x0002
65 #define	VIRTQUEUE_FLAG_EVENT_IDX 0x0004
66 
67 	int			 vq_max_indirect_size;
68 	bus_size_t		 vq_notify_offset;
69 	virtqueue_intr_t	*vq_intrhand;
70 	void			*vq_intrhand_arg;
71 
72 	struct vring		 vq_ring;
73 	uint16_t		 vq_free_cnt;
74 	uint16_t		 vq_queued_cnt;
75 	/*
76 	 * Head of the free chain in the descriptor table. If
77 	 * there are no free descriptors, this will be set to
78 	 * VQ_RING_DESC_CHAIN_END.
79 	 */
80 	uint16_t		 vq_desc_head_idx;
81 	/*
82 	 * Last consumed descriptor in the used table,
83 	 * trails vq_ring.used->idx.
84 	 */
85 	uint16_t		 vq_used_cons_idx;
86 
87 	void			*vq_ring_mem;
88 	int			 vq_indirect_mem_size;
89 	int			 vq_alignment;
90 	int			 vq_ring_size;
91 	char			 vq_name[VIRTQUEUE_MAX_NAME_SZ];
92 
93 	struct vq_desc_extra {
94 		void		  *cookie;
95 		struct vring_desc *indirect;
96 		vm_paddr_t	   indirect_paddr;
97 		uint16_t	   ndescs;
98 	} vq_descx[0];
99 };
100 
101 /*
102  * The maximum virtqueue size is 2^15. Use that value as the end of
103  * descriptor chain terminator since it will never be a valid index
104  * in the descriptor table. This is used to verify we are correctly
105  * handling vq_free_cnt.
106  */
107 #define VQ_RING_DESC_CHAIN_END 32768
108 
109 #define VQASSERT(_vq, _exp, _msg, ...)				\
110     KASSERT((_exp),("%s: %s - "_msg, __func__, (_vq)->vq_name,	\
111 	##__VA_ARGS__))
112 
113 #define VQ_RING_ASSERT_VALID_IDX(_vq, _idx)			\
114     VQASSERT((_vq), (_idx) < (_vq)->vq_nentries,		\
115 	"invalid ring index: %d, max: %d", (_idx),		\
116 	(_vq)->vq_nentries)
117 
118 #define VQ_RING_ASSERT_CHAIN_TERM(_vq)				\
119     VQASSERT((_vq), (_vq)->vq_desc_head_idx ==			\
120 	VQ_RING_DESC_CHAIN_END,	"full ring terminated "		\
121 	"incorrectly: head idx: %d", (_vq)->vq_desc_head_idx)
122 
123 static int	virtqueue_init_indirect(struct virtqueue *vq, int);
124 static void	virtqueue_free_indirect(struct virtqueue *vq);
125 static void	virtqueue_init_indirect_list(struct virtqueue *,
126 		    struct vring_desc *);
127 
128 static void	vq_ring_init(struct virtqueue *);
129 static void	vq_ring_update_avail(struct virtqueue *, uint16_t);
130 static uint16_t	vq_ring_enqueue_segments(struct virtqueue *,
131 		    struct vring_desc *, uint16_t, struct sglist *, int, int);
132 static int	vq_ring_use_indirect(struct virtqueue *, int);
133 static void	vq_ring_enqueue_indirect(struct virtqueue *, void *,
134 		    struct sglist *, int, int);
135 static int	vq_ring_enable_interrupt(struct virtqueue *, uint16_t);
136 static int	vq_ring_must_notify_host(struct virtqueue *);
137 static void	vq_ring_notify_host(struct virtqueue *);
138 static void	vq_ring_free_chain(struct virtqueue *, uint16_t);
139 
140 #define vq_modern(_vq) 		(((_vq)->vq_flags & VIRTQUEUE_FLAG_MODERN) != 0)
141 #define vq_htog16(_vq, _val) 	virtio_htog16(vq_modern(_vq), _val)
142 #define vq_htog32(_vq, _val) 	virtio_htog32(vq_modern(_vq), _val)
143 #define vq_htog64(_vq, _val) 	virtio_htog64(vq_modern(_vq), _val)
144 #define vq_gtoh16(_vq, _val) 	virtio_gtoh16(vq_modern(_vq), _val)
145 #define vq_gtoh32(_vq, _val) 	virtio_gtoh32(vq_modern(_vq), _val)
146 #define vq_gtoh64(_vq, _val) 	virtio_gtoh64(vq_modern(_vq), _val)
147 
148 int
149 virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size,
150     bus_size_t notify_offset, int align, vm_paddr_t highaddr,
151     struct vq_alloc_info *info, struct virtqueue **vqp)
152 {
153 	struct virtqueue *vq;
154 	int error;
155 
156 	*vqp = NULL;
157 	error = 0;
158 
159 	if (size == 0) {
160 		device_printf(dev,
161 		    "virtqueue %d (%s) does not exist (size is zero)\n",
162 		    queue, info->vqai_name);
163 		return (ENODEV);
164 	} else if (!powerof2(size)) {
165 		device_printf(dev,
166 		    "virtqueue %d (%s) size is not a power of 2: %d\n",
167 		    queue, info->vqai_name, size);
168 		return (ENXIO);
169 	} else if (info->vqai_maxindirsz > VIRTIO_MAX_INDIRECT) {
170 		device_printf(dev, "virtqueue %d (%s) requested too many "
171 		    "indirect descriptors: %d, max %d\n",
172 		    queue, info->vqai_name, info->vqai_maxindirsz,
173 		    VIRTIO_MAX_INDIRECT);
174 		return (EINVAL);
175 	}
176 
177 	vq = malloc(sizeof(struct virtqueue) +
178 	    size * sizeof(struct vq_desc_extra), M_DEVBUF, M_NOWAIT | M_ZERO);
179 	if (vq == NULL) {
180 		device_printf(dev, "cannot allocate virtqueue\n");
181 		return (ENOMEM);
182 	}
183 
184 	vq->vq_dev = dev;
185 	strlcpy(vq->vq_name, info->vqai_name, sizeof(vq->vq_name));
186 	vq->vq_queue_index = queue;
187 	vq->vq_notify_offset = notify_offset;
188 	vq->vq_alignment = align;
189 	vq->vq_nentries = size;
190 	vq->vq_free_cnt = size;
191 	vq->vq_intrhand = info->vqai_intr;
192 	vq->vq_intrhand_arg = info->vqai_intr_arg;
193 
194 	if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_F_VERSION_1) != 0)
195 		vq->vq_flags |= VIRTQUEUE_FLAG_MODERN;
196 	if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_EVENT_IDX) != 0)
197 		vq->vq_flags |= VIRTQUEUE_FLAG_EVENT_IDX;
198 
199 	if (info->vqai_maxindirsz > 1) {
200 		error = virtqueue_init_indirect(vq, info->vqai_maxindirsz);
201 		if (error)
202 			goto fail;
203 	}
204 
205 	vq->vq_ring_size = round_page(vring_size(size, align));
206 	vq->vq_ring_mem = contigmalloc(vq->vq_ring_size, M_DEVBUF,
207 	    M_NOWAIT | M_ZERO, 0, highaddr, PAGE_SIZE, 0);
208 	if (vq->vq_ring_mem == NULL) {
209 		device_printf(dev,
210 		    "cannot allocate memory for virtqueue ring\n");
211 		error = ENOMEM;
212 		goto fail;
213 	}
214 
215 	vq_ring_init(vq);
216 	virtqueue_disable_intr(vq);
217 
218 	*vqp = vq;
219 
220 fail:
221 	if (error)
222 		virtqueue_free(vq);
223 
224 	return (error);
225 }
226 
227 static int
228 virtqueue_init_indirect(struct virtqueue *vq, int indirect_size)
229 {
230 	device_t dev;
231 	struct vq_desc_extra *dxp;
232 	int i, size;
233 
234 	dev = vq->vq_dev;
235 
236 	if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0) {
237 		/*
238 		 * Indirect descriptors requested by the driver but not
239 		 * negotiated. Return zero to keep the initialization
240 		 * going: we'll run fine without.
241 		 */
242 		if (bootverbose)
243 			device_printf(dev, "virtqueue %d (%s) requested "
244 			    "indirect descriptors but not negotiated\n",
245 			    vq->vq_queue_index, vq->vq_name);
246 		return (0);
247 	}
248 
249 	size = indirect_size * sizeof(struct vring_desc);
250 	vq->vq_max_indirect_size = indirect_size;
251 	vq->vq_indirect_mem_size = size;
252 	vq->vq_flags |= VIRTQUEUE_FLAG_INDIRECT;
253 
254 	for (i = 0; i < vq->vq_nentries; i++) {
255 		dxp = &vq->vq_descx[i];
256 
257 		dxp->indirect = malloc(size, M_DEVBUF, M_NOWAIT);
258 		if (dxp->indirect == NULL) {
259 			device_printf(dev, "cannot allocate indirect list\n");
260 			return (ENOMEM);
261 		}
262 
263 		dxp->indirect_paddr = vtophys(dxp->indirect);
264 		virtqueue_init_indirect_list(vq, dxp->indirect);
265 	}
266 
267 	return (0);
268 }
269 
270 static void
271 virtqueue_free_indirect(struct virtqueue *vq)
272 {
273 	struct vq_desc_extra *dxp;
274 	int i;
275 
276 	for (i = 0; i < vq->vq_nentries; i++) {
277 		dxp = &vq->vq_descx[i];
278 
279 		if (dxp->indirect == NULL)
280 			break;
281 
282 		free(dxp->indirect, M_DEVBUF);
283 		dxp->indirect = NULL;
284 		dxp->indirect_paddr = 0;
285 	}
286 
287 	vq->vq_flags &= ~VIRTQUEUE_FLAG_INDIRECT;
288 	vq->vq_indirect_mem_size = 0;
289 }
290 
291 static void
292 virtqueue_init_indirect_list(struct virtqueue *vq,
293     struct vring_desc *indirect)
294 {
295 	int i;
296 
297 	bzero(indirect, vq->vq_indirect_mem_size);
298 
299 	for (i = 0; i < vq->vq_max_indirect_size - 1; i++)
300 		indirect[i].next = vq_gtoh16(vq, i + 1);
301 	indirect[i].next = vq_gtoh16(vq, VQ_RING_DESC_CHAIN_END);
302 }
303 
304 int
305 virtqueue_reinit(struct virtqueue *vq, uint16_t size)
306 {
307 	struct vq_desc_extra *dxp;
308 	int i;
309 
310 	if (vq->vq_nentries != size) {
311 		device_printf(vq->vq_dev,
312 		    "%s: '%s' changed size; old=%hu, new=%hu\n",
313 		    __func__, vq->vq_name, vq->vq_nentries, size);
314 		return (EINVAL);
315 	}
316 
317 	/* Warn if the virtqueue was not properly cleaned up. */
318 	if (vq->vq_free_cnt != vq->vq_nentries) {
319 		device_printf(vq->vq_dev,
320 		    "%s: warning '%s' virtqueue not empty, "
321 		    "leaking %d entries\n", __func__, vq->vq_name,
322 		    vq->vq_nentries - vq->vq_free_cnt);
323 	}
324 
325 	vq->vq_desc_head_idx = 0;
326 	vq->vq_used_cons_idx = 0;
327 	vq->vq_queued_cnt = 0;
328 	vq->vq_free_cnt = vq->vq_nentries;
329 
330 	/* To be safe, reset all our allocated memory. */
331 	bzero(vq->vq_ring_mem, vq->vq_ring_size);
332 	for (i = 0; i < vq->vq_nentries; i++) {
333 		dxp = &vq->vq_descx[i];
334 		dxp->cookie = NULL;
335 		dxp->ndescs = 0;
336 		if (vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT)
337 			virtqueue_init_indirect_list(vq, dxp->indirect);
338 	}
339 
340 	vq_ring_init(vq);
341 	virtqueue_disable_intr(vq);
342 
343 	return (0);
344 }
345 
346 void
347 virtqueue_free(struct virtqueue *vq)
348 {
349 
350 	if (vq->vq_free_cnt != vq->vq_nentries) {
351 		device_printf(vq->vq_dev, "%s: freeing non-empty virtqueue, "
352 		    "leaking %d entries\n", vq->vq_name,
353 		    vq->vq_nentries - vq->vq_free_cnt);
354 	}
355 
356 	if (vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT)
357 		virtqueue_free_indirect(vq);
358 
359 	if (vq->vq_ring_mem != NULL) {
360 		contigfree(vq->vq_ring_mem, vq->vq_ring_size, M_DEVBUF);
361 		vq->vq_ring_size = 0;
362 		vq->vq_ring_mem = NULL;
363 	}
364 
365 	free(vq, M_DEVBUF);
366 }
367 
368 vm_paddr_t
369 virtqueue_paddr(struct virtqueue *vq)
370 {
371 
372 	return (vtophys(vq->vq_ring_mem));
373 }
374 
375 vm_paddr_t
376 virtqueue_desc_paddr(struct virtqueue *vq)
377 {
378 
379 	return (vtophys(vq->vq_ring.desc));
380 }
381 
382 vm_paddr_t
383 virtqueue_avail_paddr(struct virtqueue *vq)
384 {
385 
386 	return (vtophys(vq->vq_ring.avail));
387 }
388 
389 vm_paddr_t
390 virtqueue_used_paddr(struct virtqueue *vq)
391 {
392 
393 	return (vtophys(vq->vq_ring.used));
394 }
395 
396 uint16_t
397 virtqueue_index(struct virtqueue *vq)
398 {
399 
400 	return (vq->vq_queue_index);
401 }
402 
403 int
404 virtqueue_size(struct virtqueue *vq)
405 {
406 
407 	return (vq->vq_nentries);
408 }
409 
410 int
411 virtqueue_nfree(struct virtqueue *vq)
412 {
413 
414 	return (vq->vq_free_cnt);
415 }
416 
417 int
418 virtqueue_empty(struct virtqueue *vq)
419 {
420 
421 	return (vq->vq_nentries == vq->vq_free_cnt);
422 }
423 
424 int
425 virtqueue_full(struct virtqueue *vq)
426 {
427 
428 	return (vq->vq_free_cnt == 0);
429 }
430 
431 void
432 virtqueue_notify(struct virtqueue *vq)
433 {
434 
435 	/* Ensure updated avail->idx is visible to host. */
436 	mb();
437 
438 	if (vq_ring_must_notify_host(vq))
439 		vq_ring_notify_host(vq);
440 	vq->vq_queued_cnt = 0;
441 }
442 
443 int
444 virtqueue_nused(struct virtqueue *vq)
445 {
446 	uint16_t used_idx, nused;
447 
448 	used_idx = vq_htog16(vq, vq->vq_ring.used->idx);
449 
450 	nused = (uint16_t)(used_idx - vq->vq_used_cons_idx);
451 	VQASSERT(vq, nused <= vq->vq_nentries, "used more than available");
452 
453 	return (nused);
454 }
455 
456 int
457 virtqueue_intr_filter(struct virtqueue *vq)
458 {
459 
460 	if (vq->vq_used_cons_idx == vq_htog16(vq, vq->vq_ring.used->idx))
461 		return (0);
462 
463 	virtqueue_disable_intr(vq);
464 
465 	return (1);
466 }
467 
468 void
469 virtqueue_intr(struct virtqueue *vq)
470 {
471 
472 	vq->vq_intrhand(vq->vq_intrhand_arg);
473 }
474 
475 int
476 virtqueue_enable_intr(struct virtqueue *vq)
477 {
478 
479 	return (vq_ring_enable_interrupt(vq, 0));
480 }
481 
482 int
483 virtqueue_postpone_intr(struct virtqueue *vq, vq_postpone_t hint)
484 {
485 	uint16_t ndesc, avail_idx;
486 
487 	avail_idx = vq_htog16(vq, vq->vq_ring.avail->idx);
488 	ndesc = (uint16_t)(avail_idx - vq->vq_used_cons_idx);
489 
490 	switch (hint) {
491 	case VQ_POSTPONE_SHORT:
492 		ndesc = ndesc / 4;
493 		break;
494 	case VQ_POSTPONE_LONG:
495 		ndesc = (ndesc * 3) / 4;
496 		break;
497 	case VQ_POSTPONE_EMPTIED:
498 		break;
499 	}
500 
501 	return (vq_ring_enable_interrupt(vq, ndesc));
502 }
503 
504 /*
505  * Note this is only considered a hint to the host.
506  */
507 void
508 virtqueue_disable_intr(struct virtqueue *vq)
509 {
510 
511 	if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) {
512 		vring_used_event(&vq->vq_ring) = vq_gtoh16(vq,
513 		    vq->vq_used_cons_idx - vq->vq_nentries - 1);
514 		return;
515 	}
516 
517 	vq->vq_ring.avail->flags |= vq_gtoh16(vq, VRING_AVAIL_F_NO_INTERRUPT);
518 }
519 
520 int
521 virtqueue_enqueue(struct virtqueue *vq, void *cookie, struct sglist *sg,
522     int readable, int writable)
523 {
524 	struct vq_desc_extra *dxp;
525 	int needed;
526 	uint16_t head_idx, idx;
527 
528 	needed = readable + writable;
529 
530 	VQASSERT(vq, cookie != NULL, "enqueuing with no cookie");
531 	VQASSERT(vq, needed == sg->sg_nseg,
532 	    "segment count mismatch, %d, %d", needed, sg->sg_nseg);
533 	VQASSERT(vq,
534 	    needed <= vq->vq_nentries || needed <= vq->vq_max_indirect_size,
535 	    "too many segments to enqueue: %d, %d/%d", needed,
536 	    vq->vq_nentries, vq->vq_max_indirect_size);
537 
538 	if (needed < 1)
539 		return (EINVAL);
540 	if (vq->vq_free_cnt == 0)
541 		return (ENOSPC);
542 
543 	if (vq_ring_use_indirect(vq, needed)) {
544 		vq_ring_enqueue_indirect(vq, cookie, sg, readable, writable);
545 		return (0);
546 	} else if (vq->vq_free_cnt < needed)
547 		return (EMSGSIZE);
548 
549 	head_idx = vq->vq_desc_head_idx;
550 	VQ_RING_ASSERT_VALID_IDX(vq, head_idx);
551 	dxp = &vq->vq_descx[head_idx];
552 
553 	VQASSERT(vq, dxp->cookie == NULL,
554 	    "cookie already exists for index %d", head_idx);
555 	dxp->cookie = cookie;
556 	dxp->ndescs = needed;
557 
558 	idx = vq_ring_enqueue_segments(vq, vq->vq_ring.desc, head_idx,
559 	    sg, readable, writable);
560 
561 	vq->vq_desc_head_idx = idx;
562 	vq->vq_free_cnt -= needed;
563 	if (vq->vq_free_cnt == 0)
564 		VQ_RING_ASSERT_CHAIN_TERM(vq);
565 	else
566 		VQ_RING_ASSERT_VALID_IDX(vq, idx);
567 
568 	vq_ring_update_avail(vq, head_idx);
569 
570 	return (0);
571 }
572 
573 void *
574 virtqueue_dequeue(struct virtqueue *vq, uint32_t *len)
575 {
576 	struct vring_used_elem *uep;
577 	void *cookie;
578 	uint16_t used_idx, desc_idx;
579 
580 	if (vq->vq_used_cons_idx == vq_htog16(vq, vq->vq_ring.used->idx))
581 		return (NULL);
582 
583 	used_idx = vq->vq_used_cons_idx++ & (vq->vq_nentries - 1);
584 	uep = &vq->vq_ring.used->ring[used_idx];
585 
586 	rmb();
587 	desc_idx = (uint16_t) vq_htog32(vq, uep->id);
588 	if (len != NULL)
589 		*len = vq_htog32(vq, uep->len);
590 
591 	vq_ring_free_chain(vq, desc_idx);
592 
593 	cookie = vq->vq_descx[desc_idx].cookie;
594 	VQASSERT(vq, cookie != NULL, "no cookie for index %d", desc_idx);
595 	vq->vq_descx[desc_idx].cookie = NULL;
596 
597 	return (cookie);
598 }
599 
600 void *
601 virtqueue_poll(struct virtqueue *vq, uint32_t *len)
602 {
603 	void *cookie;
604 
605 	VIRTIO_BUS_POLL(vq->vq_dev);
606 	while ((cookie = virtqueue_dequeue(vq, len)) == NULL) {
607 		cpu_spinwait();
608 		VIRTIO_BUS_POLL(vq->vq_dev);
609 	}
610 
611 	return (cookie);
612 }
613 
614 void *
615 virtqueue_drain(struct virtqueue *vq, int *last)
616 {
617 	void *cookie;
618 	int idx;
619 
620 	cookie = NULL;
621 	idx = *last;
622 
623 	while (idx < vq->vq_nentries && cookie == NULL) {
624 		if ((cookie = vq->vq_descx[idx].cookie) != NULL) {
625 			vq->vq_descx[idx].cookie = NULL;
626 			/* Free chain to keep free count consistent. */
627 			vq_ring_free_chain(vq, idx);
628 		}
629 		idx++;
630 	}
631 
632 	*last = idx;
633 
634 	return (cookie);
635 }
636 
637 void
638 virtqueue_dump(struct virtqueue *vq)
639 {
640 
641 	if (vq == NULL)
642 		return;
643 
644 	printf("VQ: %s - size=%d; free=%d; used=%d; queued=%d; "
645 	    "desc_head_idx=%d; avail.idx=%d; used_cons_idx=%d; "
646 	    "used.idx=%d; used_event_idx=%d; avail.flags=0x%x; used.flags=0x%x\n",
647 	    vq->vq_name, vq->vq_nentries, vq->vq_free_cnt, virtqueue_nused(vq),
648 	    vq->vq_queued_cnt, vq->vq_desc_head_idx,
649 	    vq_htog16(vq, vq->vq_ring.avail->idx), vq->vq_used_cons_idx,
650 	    vq_htog16(vq, vq->vq_ring.used->idx),
651 	    vq_htog16(vq, vring_used_event(&vq->vq_ring)),
652 	    vq_htog16(vq, vq->vq_ring.avail->flags),
653 	    vq_htog16(vq, vq->vq_ring.used->flags));
654 }
655 
656 static void
657 vq_ring_init(struct virtqueue *vq)
658 {
659 	struct vring *vr;
660 	char *ring_mem;
661 	int i, size;
662 
663 	ring_mem = vq->vq_ring_mem;
664 	size = vq->vq_nentries;
665 	vr = &vq->vq_ring;
666 
667 	vring_init(vr, size, ring_mem, vq->vq_alignment);
668 
669 	for (i = 0; i < size - 1; i++)
670 		vr->desc[i].next = vq_gtoh16(vq, i + 1);
671 	vr->desc[i].next = vq_gtoh16(vq, VQ_RING_DESC_CHAIN_END);
672 }
673 
674 static void
675 vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx)
676 {
677 	uint16_t avail_idx, avail_ring_idx;
678 
679 	/*
680 	 * Place the head of the descriptor chain into the next slot and make
681 	 * it usable to the host. The chain is made available now rather than
682 	 * deferring to virtqueue_notify() in the hopes that if the host is
683 	 * currently running on another CPU, we can keep it processing the new
684 	 * descriptor.
685 	 */
686 	avail_idx = vq_htog16(vq, vq->vq_ring.avail->idx);
687 	avail_ring_idx = avail_idx & (vq->vq_nentries - 1);
688 	vq->vq_ring.avail->ring[avail_ring_idx] = vq_gtoh16(vq, desc_idx);
689 
690 	wmb();
691 	vq->vq_ring.avail->idx = vq_gtoh16(vq, avail_idx + 1);
692 
693 	/* Keep pending count until virtqueue_notify(). */
694 	vq->vq_queued_cnt++;
695 }
696 
697 static uint16_t
698 vq_ring_enqueue_segments(struct virtqueue *vq, struct vring_desc *desc,
699     uint16_t head_idx, struct sglist *sg, int readable, int writable)
700 {
701 	struct sglist_seg *seg;
702 	struct vring_desc *dp;
703 	int i, needed;
704 	uint16_t idx;
705 
706 	needed = readable + writable;
707 
708 	for (i = 0, idx = head_idx, seg = sg->sg_segs;
709 	     i < needed;
710 	     i++, idx = vq_htog16(vq, dp->next), seg++) {
711 		VQASSERT(vq, idx != VQ_RING_DESC_CHAIN_END,
712 		    "premature end of free desc chain");
713 
714 		dp = &desc[idx];
715 		dp->addr = vq_gtoh64(vq, seg->ss_paddr);
716 		dp->len = vq_gtoh32(vq, seg->ss_len);
717 		dp->flags = 0;
718 
719 		if (i < needed - 1)
720 			dp->flags |= vq_gtoh16(vq, VRING_DESC_F_NEXT);
721 		if (i >= readable)
722 			dp->flags |= vq_gtoh16(vq, VRING_DESC_F_WRITE);
723 	}
724 
725 	return (idx);
726 }
727 
728 static int
729 vq_ring_use_indirect(struct virtqueue *vq, int needed)
730 {
731 
732 	if ((vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT) == 0)
733 		return (0);
734 
735 	if (vq->vq_max_indirect_size < needed)
736 		return (0);
737 
738 	if (needed < 2)
739 		return (0);
740 
741 	return (1);
742 }
743 
744 static void
745 vq_ring_enqueue_indirect(struct virtqueue *vq, void *cookie,
746     struct sglist *sg, int readable, int writable)
747 {
748 	struct vring_desc *dp;
749 	struct vq_desc_extra *dxp;
750 	int needed;
751 	uint16_t head_idx;
752 
753 	needed = readable + writable;
754 	VQASSERT(vq, needed <= vq->vq_max_indirect_size,
755 	    "enqueuing too many indirect descriptors");
756 
757 	head_idx = vq->vq_desc_head_idx;
758 	VQ_RING_ASSERT_VALID_IDX(vq, head_idx);
759 	dp = &vq->vq_ring.desc[head_idx];
760 	dxp = &vq->vq_descx[head_idx];
761 
762 	VQASSERT(vq, dxp->cookie == NULL,
763 	    "cookie already exists for index %d", head_idx);
764 	dxp->cookie = cookie;
765 	dxp->ndescs = 1;
766 
767 	dp->addr = vq_gtoh64(vq, dxp->indirect_paddr);
768 	dp->len = vq_gtoh32(vq, needed * sizeof(struct vring_desc));
769 	dp->flags = vq_gtoh16(vq, VRING_DESC_F_INDIRECT);
770 
771 	vq_ring_enqueue_segments(vq, dxp->indirect, 0,
772 	    sg, readable, writable);
773 
774 	vq->vq_desc_head_idx = vq_htog16(vq, dp->next);
775 	vq->vq_free_cnt--;
776 	if (vq->vq_free_cnt == 0)
777 		VQ_RING_ASSERT_CHAIN_TERM(vq);
778 	else
779 		VQ_RING_ASSERT_VALID_IDX(vq, vq->vq_desc_head_idx);
780 
781 	vq_ring_update_avail(vq, head_idx);
782 }
783 
784 static int
785 vq_ring_enable_interrupt(struct virtqueue *vq, uint16_t ndesc)
786 {
787 
788 	/*
789 	 * Enable interrupts, making sure we get the latest index of
790 	 * what's already been consumed.
791 	 */
792 	if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) {
793 		vring_used_event(&vq->vq_ring) =
794 		    vq_gtoh16(vq, vq->vq_used_cons_idx + ndesc);
795 	} else {
796 		vq->vq_ring.avail->flags &=
797 		    vq_gtoh16(vq, ~VRING_AVAIL_F_NO_INTERRUPT);
798 	}
799 
800 	mb();
801 
802 	/*
803 	 * Enough items may have already been consumed to meet our threshold
804 	 * since we last checked. Let our caller know so it processes the new
805 	 * entries.
806 	 */
807 	if (virtqueue_nused(vq) > ndesc)
808 		return (1);
809 
810 	return (0);
811 }
812 
813 static int
814 vq_ring_must_notify_host(struct virtqueue *vq)
815 {
816 	uint16_t new_idx, prev_idx, event_idx, flags;
817 
818 	if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) {
819 		new_idx = vq_htog16(vq, vq->vq_ring.avail->idx);
820 		prev_idx = new_idx - vq->vq_queued_cnt;
821 		event_idx = vq_htog16(vq, vring_avail_event(&vq->vq_ring));
822 
823 		return (vring_need_event(event_idx, new_idx, prev_idx) != 0);
824 	}
825 
826 	flags = vq->vq_ring.used->flags;
827 	return ((flags & vq_gtoh16(vq, VRING_USED_F_NO_NOTIFY)) == 0);
828 }
829 
830 static void
831 vq_ring_notify_host(struct virtqueue *vq)
832 {
833 
834 	VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index,
835 	    vq->vq_notify_offset);
836 }
837 
838 static void
839 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
840 {
841 	struct vring_desc *dp;
842 	struct vq_desc_extra *dxp;
843 
844 	VQ_RING_ASSERT_VALID_IDX(vq, desc_idx);
845 	dp = &vq->vq_ring.desc[desc_idx];
846 	dxp = &vq->vq_descx[desc_idx];
847 
848 	if (vq->vq_free_cnt == 0)
849 		VQ_RING_ASSERT_CHAIN_TERM(vq);
850 
851 	vq->vq_free_cnt += dxp->ndescs;
852 	dxp->ndescs--;
853 
854 	if ((dp->flags & vq_gtoh16(vq, VRING_DESC_F_INDIRECT)) == 0) {
855 		while (dp->flags & vq_gtoh16(vq, VRING_DESC_F_NEXT)) {
856 			uint16_t next_idx = vq_htog16(vq, dp->next);
857 			VQ_RING_ASSERT_VALID_IDX(vq, next_idx);
858 			dp = &vq->vq_ring.desc[next_idx];
859 			dxp->ndescs--;
860 		}
861 	}
862 
863 	VQASSERT(vq, dxp->ndescs == 0,
864 	    "failed to free entire desc chain, remaining: %d", dxp->ndescs);
865 
866 	/*
867 	 * We must append the existing free chain, if any, to the end of
868 	 * newly freed chain. If the virtqueue was completely used, then
869 	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
870 	 */
871 	dp->next = vq_gtoh16(vq, vq->vq_desc_head_idx);
872 	vq->vq_desc_head_idx = desc_idx;
873 }
874