xref: /linux/drivers/virtio/virtio_ring.c (revision 68a052239fc4b351e961f698b824f7654a346091)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Virtio ring implementation.
3  *
4  *  Copyright 2007 Rusty Russell IBM Corporation
5  */
6 #include <linux/virtio.h>
7 #include <linux/virtio_ring.h>
8 #include <linux/virtio_config.h>
9 #include <linux/device.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/hrtimer.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/kmsan.h>
15 #include <linux/spinlock.h>
16 #include <xen/xen.h>
17 
18 #ifdef DEBUG
19 /* For development, we want to crash whenever the ring is screwed. */
20 #define BAD_RING(_vq, fmt, args...)				\
21 	do {							\
22 		dev_err(&(_vq)->vq.vdev->dev,			\
23 			"%s:"fmt, (_vq)->vq.name, ##args);	\
24 		BUG();						\
25 	} while (0)
26 /* Caller is supposed to guarantee no reentry. */
27 #define START_USE(_vq)						\
28 	do {							\
29 		if ((_vq)->in_use)				\
30 			panic("%s:in_use = %i\n",		\
31 			      (_vq)->vq.name, (_vq)->in_use);	\
32 		(_vq)->in_use = __LINE__;			\
33 	} while (0)
34 #define END_USE(_vq) \
35 	do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
36 #define LAST_ADD_TIME_UPDATE(_vq)				\
37 	do {							\
38 		ktime_t now = ktime_get();			\
39 								\
40 		/* No kick or get, with .1 second between?  Warn. */ \
41 		if ((_vq)->last_add_time_valid)			\
42 			WARN_ON(ktime_to_ms(ktime_sub(now,	\
43 				(_vq)->last_add_time)) > 100);	\
44 		(_vq)->last_add_time = now;			\
45 		(_vq)->last_add_time_valid = true;		\
46 	} while (0)
47 #define LAST_ADD_TIME_CHECK(_vq)				\
48 	do {							\
49 		if ((_vq)->last_add_time_valid) {		\
50 			WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
51 				      (_vq)->last_add_time)) > 100); \
52 		}						\
53 	} while (0)
54 #define LAST_ADD_TIME_INVALID(_vq)				\
55 	((_vq)->last_add_time_valid = false)
56 #else
57 #define BAD_RING(_vq, fmt, args...)				\
58 	do {							\
59 		dev_err(&_vq->vq.vdev->dev,			\
60 			"%s:"fmt, (_vq)->vq.name, ##args);	\
61 		(_vq)->broken = true;				\
62 	} while (0)
63 #define START_USE(vq)
64 #define END_USE(vq)
65 #define LAST_ADD_TIME_UPDATE(vq)
66 #define LAST_ADD_TIME_CHECK(vq)
67 #define LAST_ADD_TIME_INVALID(vq)
68 #endif
69 
70 struct vring_desc_state_split {
71 	void *data;			/* Data for callback. */
72 
73 	/* Indirect desc table and extra table, if any. These two will be
74 	 * allocated together. So we won't stress more to the memory allocator.
75 	 */
76 	struct vring_desc *indir_desc;
77 };
78 
79 struct vring_desc_state_packed {
80 	void *data;			/* Data for callback. */
81 
82 	/* Indirect desc table and extra table, if any. These two will be
83 	 * allocated together. So we won't stress more to the memory allocator.
84 	 */
85 	struct vring_packed_desc *indir_desc;
86 	u16 num;			/* Descriptor list length. */
87 	u16 last;			/* The last desc state in a list. */
88 };
89 
90 struct vring_desc_extra {
91 	dma_addr_t addr;		/* Descriptor DMA addr. */
92 	u32 len;			/* Descriptor length. */
93 	u16 flags;			/* Descriptor flags. */
94 	u16 next;			/* The next desc state in a list. */
95 };
96 
97 struct vring_virtqueue_split {
98 	/* Actual memory layout for this queue. */
99 	struct vring vring;
100 
101 	/* Last written value to avail->flags */
102 	u16 avail_flags_shadow;
103 
104 	/*
105 	 * Last written value to avail->idx in
106 	 * guest byte order.
107 	 */
108 	u16 avail_idx_shadow;
109 
110 	/* Per-descriptor state. */
111 	struct vring_desc_state_split *desc_state;
112 	struct vring_desc_extra *desc_extra;
113 
114 	/* DMA address and size information */
115 	dma_addr_t queue_dma_addr;
116 	size_t queue_size_in_bytes;
117 
118 	/*
119 	 * The parameters for creating vrings are reserved for creating new
120 	 * vring.
121 	 */
122 	u32 vring_align;
123 	bool may_reduce_num;
124 };
125 
126 struct vring_virtqueue_packed {
127 	/* Actual memory layout for this queue. */
128 	struct {
129 		unsigned int num;
130 		struct vring_packed_desc *desc;
131 		struct vring_packed_desc_event *driver;
132 		struct vring_packed_desc_event *device;
133 	} vring;
134 
135 	/* Driver ring wrap counter. */
136 	bool avail_wrap_counter;
137 
138 	/* Avail used flags. */
139 	u16 avail_used_flags;
140 
141 	/* Index of the next avail descriptor. */
142 	u16 next_avail_idx;
143 
144 	/*
145 	 * Last written value to driver->flags in
146 	 * guest byte order.
147 	 */
148 	u16 event_flags_shadow;
149 
150 	/* Per-descriptor state. */
151 	struct vring_desc_state_packed *desc_state;
152 	struct vring_desc_extra *desc_extra;
153 
154 	/* DMA address and size information */
155 	dma_addr_t ring_dma_addr;
156 	dma_addr_t driver_event_dma_addr;
157 	dma_addr_t device_event_dma_addr;
158 	size_t ring_size_in_bytes;
159 	size_t event_size_in_bytes;
160 };
161 
162 struct vring_virtqueue {
163 	struct virtqueue vq;
164 
165 	/* Is this a packed ring? */
166 	bool packed_ring;
167 
168 	/* Is DMA API used? */
169 	bool use_map_api;
170 
171 	/* Can we use weak barriers? */
172 	bool weak_barriers;
173 
174 	/* Other side has made a mess, don't try any more. */
175 	bool broken;
176 
177 	/* Host supports indirect buffers */
178 	bool indirect;
179 
180 	/* Host publishes avail event idx */
181 	bool event;
182 
183 	/* Head of free buffer list. */
184 	unsigned int free_head;
185 	/* Number we've added since last sync. */
186 	unsigned int num_added;
187 
188 	/* Last used index  we've seen.
189 	 * for split ring, it just contains last used index
190 	 * for packed ring:
191 	 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
192 	 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
193 	 */
194 	u16 last_used_idx;
195 
196 	/* Hint for event idx: already triggered no need to disable. */
197 	bool event_triggered;
198 
199 	union {
200 		/* Available for split ring */
201 		struct vring_virtqueue_split split;
202 
203 		/* Available for packed ring */
204 		struct vring_virtqueue_packed packed;
205 	};
206 
207 	/* How to notify other side. FIXME: commonalize hcalls! */
208 	bool (*notify)(struct virtqueue *vq);
209 
210 	/* DMA, allocation, and size information */
211 	bool we_own_ring;
212 
213 	union virtio_map map;
214 
215 #ifdef DEBUG
216 	/* They're supposed to lock for us. */
217 	unsigned int in_use;
218 
219 	/* Figure out if their kicks are too delayed. */
220 	bool last_add_time_valid;
221 	ktime_t last_add_time;
222 #endif
223 };
224 
225 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
226 static void vring_free(struct virtqueue *_vq);
227 
228 /*
229  * Helpers.
230  */
231 
232 #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq)
233 
234 static bool virtqueue_use_indirect(const struct vring_virtqueue *vq,
235 				   unsigned int total_sg)
236 {
237 	/*
238 	 * If the host supports indirect descriptor tables, and we have multiple
239 	 * buffers, then go indirect. FIXME: tune this threshold
240 	 */
241 	return (vq->indirect && total_sg > 1 && vq->vq.num_free);
242 }
243 
244 /*
245  * Modern virtio devices have feature bits to specify whether they need a
246  * quirk and bypass the IOMMU. If not there, just use the DMA API.
247  *
248  * If there, the interaction between virtio and DMA API is messy.
249  *
250  * On most systems with virtio, physical addresses match bus addresses,
251  * and it doesn't particularly matter whether we use the DMA API.
252  *
253  * On some systems, including Xen and any system with a physical device
254  * that speaks virtio behind a physical IOMMU, we must use the DMA API
255  * for virtio DMA to work at all.
256  *
257  * On other systems, including SPARC and PPC64, virtio-pci devices are
258  * enumerated as though they are behind an IOMMU, but the virtio host
259  * ignores the IOMMU, so we must either pretend that the IOMMU isn't
260  * there or somehow map everything as the identity.
261  *
262  * For the time being, we preserve historic behavior and bypass the DMA
263  * API.
264  *
265  * TODO: install a per-device DMA ops structure that does the right thing
266  * taking into account all the above quirks, and use the DMA API
267  * unconditionally on data path.
268  */
269 
270 static bool vring_use_map_api(const struct virtio_device *vdev)
271 {
272 	if (!virtio_has_dma_quirk(vdev))
273 		return true;
274 
275 	/* Otherwise, we are left to guess. */
276 	/*
277 	 * In theory, it's possible to have a buggy QEMU-supposed
278 	 * emulated Q35 IOMMU and Xen enabled at the same time.  On
279 	 * such a configuration, virtio has never worked and will
280 	 * not work without an even larger kludge.  Instead, enable
281 	 * the DMA API if we're a Xen guest, which at least allows
282 	 * all of the sensible Xen configurations to work correctly.
283 	 */
284 	if (xen_domain())
285 		return true;
286 
287 	return false;
288 }
289 
290 static bool vring_need_unmap_buffer(const struct vring_virtqueue *vring,
291 				    const struct vring_desc_extra *extra)
292 {
293 	return vring->use_map_api && (extra->addr != DMA_MAPPING_ERROR);
294 }
295 
296 size_t virtio_max_dma_size(const struct virtio_device *vdev)
297 {
298 	size_t max_segment_size = SIZE_MAX;
299 
300 	if (vring_use_map_api(vdev)) {
301 		if (vdev->map) {
302 			max_segment_size =
303 				vdev->map->max_mapping_size(vdev->vmap);
304 		} else
305 			max_segment_size =
306 				dma_max_mapping_size(vdev->dev.parent);
307 	}
308 
309 	return max_segment_size;
310 }
311 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
312 
313 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
314 			       dma_addr_t *map_handle, gfp_t flag,
315 			       union virtio_map map)
316 {
317 	if (vring_use_map_api(vdev)) {
318 		return virtqueue_map_alloc_coherent(vdev, map, size,
319 						    map_handle, flag);
320 	} else {
321 		void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
322 
323 		if (queue) {
324 			phys_addr_t phys_addr = virt_to_phys(queue);
325 			*map_handle = (dma_addr_t)phys_addr;
326 
327 			/*
328 			 * Sanity check: make sure we dind't truncate
329 			 * the address.  The only arches I can find that
330 			 * have 64-bit phys_addr_t but 32-bit dma_addr_t
331 			 * are certain non-highmem MIPS and x86
332 			 * configurations, but these configurations
333 			 * should never allocate physical pages above 32
334 			 * bits, so this is fine.  Just in case, throw a
335 			 * warning and abort if we end up with an
336 			 * unrepresentable address.
337 			 */
338 			if (WARN_ON_ONCE(*map_handle != phys_addr)) {
339 				free_pages_exact(queue, PAGE_ALIGN(size));
340 				return NULL;
341 			}
342 		}
343 		return queue;
344 	}
345 }
346 
347 static void vring_free_queue(struct virtio_device *vdev, size_t size,
348 			     void *queue, dma_addr_t map_handle,
349 			     union virtio_map map)
350 {
351 	if (vring_use_map_api(vdev))
352 		virtqueue_map_free_coherent(vdev, map, size,
353 					    queue, map_handle);
354 	else
355 		free_pages_exact(queue, PAGE_ALIGN(size));
356 }
357 
358 /*
359  * The DMA ops on various arches are rather gnarly right now, and
360  * making all of the arch DMA ops work on the vring device itself
361  * is a mess.
362  */
363 static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
364 {
365 	return vq->map.dma_dev;
366 }
367 
368 static int vring_mapping_error(const struct vring_virtqueue *vq,
369 			       dma_addr_t addr)
370 {
371 	struct virtio_device *vdev = vq->vq.vdev;
372 
373 	if (!vq->use_map_api)
374 		return 0;
375 
376 	if (vdev->map)
377 		return vdev->map->mapping_error(vq->map, addr);
378 	else
379 		return dma_mapping_error(vring_dma_dev(vq), addr);
380 }
381 
382 /* Map one sg entry. */
383 static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
384 			    enum dma_data_direction direction, dma_addr_t *addr,
385 			    u32 *len, bool premapped)
386 {
387 	if (premapped) {
388 		*addr = sg_dma_address(sg);
389 		*len = sg_dma_len(sg);
390 		return 0;
391 	}
392 
393 	*len = sg->length;
394 
395 	if (!vq->use_map_api) {
396 		/*
397 		 * If DMA is not used, KMSAN doesn't know that the scatterlist
398 		 * is initialized by the hardware. Explicitly check/unpoison it
399 		 * depending on the direction.
400 		 */
401 		kmsan_handle_dma(sg_phys(sg), sg->length, direction);
402 		*addr = (dma_addr_t)sg_phys(sg);
403 		return 0;
404 	}
405 
406 	/*
407 	 * We can't use dma_map_sg, because we don't use scatterlists in
408 	 * the way it expects (we don't guarantee that the scatterlist
409 	 * will exist for the lifetime of the mapping).
410 	 */
411 	*addr = virtqueue_map_page_attrs(&vq->vq, sg_page(sg),
412 					 sg->offset, sg->length,
413 					 direction, 0);
414 
415 	if (vring_mapping_error(vq, *addr))
416 		return -ENOMEM;
417 
418 	return 0;
419 }
420 
421 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
422 				   void *cpu_addr, size_t size,
423 				   enum dma_data_direction direction)
424 {
425 	if (!vq->use_map_api)
426 		return (dma_addr_t)virt_to_phys(cpu_addr);
427 
428 	return virtqueue_map_single_attrs(&vq->vq, cpu_addr,
429 					  size, direction, 0);
430 }
431 
432 static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
433 {
434 	vq->vq.num_free = num;
435 
436 	if (vq->packed_ring)
437 		vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
438 	else
439 		vq->last_used_idx = 0;
440 
441 	vq->event_triggered = false;
442 	vq->num_added = 0;
443 
444 #ifdef DEBUG
445 	vq->in_use = false;
446 	vq->last_add_time_valid = false;
447 #endif
448 }
449 
450 
451 /*
452  * Split ring specific functions - *_split().
453  */
454 
455 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
456 					  struct vring_desc_extra *extra)
457 {
458 	u16 flags;
459 
460 	flags = extra->flags;
461 
462 	if (flags & VRING_DESC_F_INDIRECT) {
463 		if (!vq->use_map_api)
464 			goto out;
465 	} else if (!vring_need_unmap_buffer(vq, extra))
466 		goto out;
467 
468 	virtqueue_unmap_page_attrs(&vq->vq,
469 				   extra->addr,
470 				   extra->len,
471 				   (flags & VRING_DESC_F_WRITE) ?
472 				   DMA_FROM_DEVICE : DMA_TO_DEVICE,
473 				   0);
474 
475 out:
476 	return extra->next;
477 }
478 
479 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
480 					       unsigned int total_sg,
481 					       gfp_t gfp)
482 {
483 	struct vring_desc_extra *extra;
484 	struct vring_desc *desc;
485 	unsigned int i, size;
486 
487 	/*
488 	 * We require lowmem mappings for the descriptors because
489 	 * otherwise virt_to_phys will give us bogus addresses in the
490 	 * virtqueue.
491 	 */
492 	gfp &= ~__GFP_HIGHMEM;
493 
494 	size = sizeof(*desc) * total_sg + sizeof(*extra) * total_sg;
495 
496 	desc = kmalloc(size, gfp);
497 	if (!desc)
498 		return NULL;
499 
500 	extra = (struct vring_desc_extra *)&desc[total_sg];
501 
502 	for (i = 0; i < total_sg; i++)
503 		extra[i].next = i + 1;
504 
505 	return desc;
506 }
507 
508 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
509 						    struct vring_desc *desc,
510 						    struct vring_desc_extra *extra,
511 						    unsigned int i,
512 						    dma_addr_t addr,
513 						    unsigned int len,
514 						    u16 flags, bool premapped)
515 {
516 	u16 next;
517 
518 	desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
519 	desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
520 	desc[i].len = cpu_to_virtio32(vq->vdev, len);
521 
522 	extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr;
523 	extra[i].len = len;
524 	extra[i].flags = flags;
525 
526 	next = extra[i].next;
527 
528 	desc[i].next = cpu_to_virtio16(vq->vdev, next);
529 
530 	return next;
531 }
532 
533 static inline int virtqueue_add_split(struct virtqueue *_vq,
534 				      struct scatterlist *sgs[],
535 				      unsigned int total_sg,
536 				      unsigned int out_sgs,
537 				      unsigned int in_sgs,
538 				      void *data,
539 				      void *ctx,
540 				      bool premapped,
541 				      gfp_t gfp)
542 {
543 	struct vring_virtqueue *vq = to_vvq(_vq);
544 	struct vring_desc_extra *extra;
545 	struct scatterlist *sg;
546 	struct vring_desc *desc;
547 	unsigned int i, n, avail, descs_used, prev, err_idx;
548 	int head;
549 	bool indirect;
550 
551 	START_USE(vq);
552 
553 	BUG_ON(data == NULL);
554 	BUG_ON(ctx && vq->indirect);
555 
556 	if (unlikely(vq->broken)) {
557 		END_USE(vq);
558 		return -EIO;
559 	}
560 
561 	LAST_ADD_TIME_UPDATE(vq);
562 
563 	BUG_ON(total_sg == 0);
564 
565 	head = vq->free_head;
566 
567 	if (virtqueue_use_indirect(vq, total_sg))
568 		desc = alloc_indirect_split(_vq, total_sg, gfp);
569 	else {
570 		desc = NULL;
571 		WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
572 	}
573 
574 	if (desc) {
575 		/* Use a single buffer which doesn't continue */
576 		indirect = true;
577 		/* Set up rest to use this indirect table. */
578 		i = 0;
579 		descs_used = 1;
580 		extra = (struct vring_desc_extra *)&desc[total_sg];
581 	} else {
582 		indirect = false;
583 		desc = vq->split.vring.desc;
584 		extra = vq->split.desc_extra;
585 		i = head;
586 		descs_used = total_sg;
587 	}
588 
589 	if (unlikely(vq->vq.num_free < descs_used)) {
590 		pr_debug("Can't add buf len %i - avail = %i\n",
591 			 descs_used, vq->vq.num_free);
592 		/* FIXME: for historical reasons, we force a notify here if
593 		 * there are outgoing parts to the buffer.  Presumably the
594 		 * host should service the ring ASAP. */
595 		if (out_sgs)
596 			vq->notify(&vq->vq);
597 		if (indirect)
598 			kfree(desc);
599 		END_USE(vq);
600 		return -ENOSPC;
601 	}
602 
603 	for (n = 0; n < out_sgs; n++) {
604 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
605 			dma_addr_t addr;
606 			u32 len;
607 
608 			if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr, &len, premapped))
609 				goto unmap_release;
610 
611 			prev = i;
612 			/* Note that we trust indirect descriptor
613 			 * table since it use stream DMA mapping.
614 			 */
615 			i = virtqueue_add_desc_split(_vq, desc, extra, i, addr, len,
616 						     VRING_DESC_F_NEXT,
617 						     premapped);
618 		}
619 	}
620 	for (; n < (out_sgs + in_sgs); n++) {
621 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
622 			dma_addr_t addr;
623 			u32 len;
624 
625 			if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr, &len, premapped))
626 				goto unmap_release;
627 
628 			prev = i;
629 			/* Note that we trust indirect descriptor
630 			 * table since it use stream DMA mapping.
631 			 */
632 			i = virtqueue_add_desc_split(_vq, desc, extra, i, addr, len,
633 						     VRING_DESC_F_NEXT |
634 						     VRING_DESC_F_WRITE,
635 						     premapped);
636 		}
637 	}
638 	/* Last one doesn't continue. */
639 	desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
640 	if (!indirect && vring_need_unmap_buffer(vq, &extra[prev]))
641 		vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
642 			~VRING_DESC_F_NEXT;
643 
644 	if (indirect) {
645 		/* Now that the indirect table is filled in, map it. */
646 		dma_addr_t addr = vring_map_single(
647 			vq, desc, total_sg * sizeof(struct vring_desc),
648 			DMA_TO_DEVICE);
649 		if (vring_mapping_error(vq, addr))
650 			goto unmap_release;
651 
652 		virtqueue_add_desc_split(_vq, vq->split.vring.desc,
653 					 vq->split.desc_extra,
654 					 head, addr,
655 					 total_sg * sizeof(struct vring_desc),
656 					 VRING_DESC_F_INDIRECT, false);
657 	}
658 
659 	/* We're using some buffers from the free list. */
660 	vq->vq.num_free -= descs_used;
661 
662 	/* Update free pointer */
663 	if (indirect)
664 		vq->free_head = vq->split.desc_extra[head].next;
665 	else
666 		vq->free_head = i;
667 
668 	/* Store token and indirect buffer state. */
669 	vq->split.desc_state[head].data = data;
670 	if (indirect)
671 		vq->split.desc_state[head].indir_desc = desc;
672 	else
673 		vq->split.desc_state[head].indir_desc = ctx;
674 
675 	/* Put entry in available array (but don't update avail->idx until they
676 	 * do sync). */
677 	avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
678 	vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
679 
680 	/* Descriptors and available array need to be set before we expose the
681 	 * new available array entries. */
682 	virtio_wmb(vq->weak_barriers);
683 	vq->split.avail_idx_shadow++;
684 	vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
685 						vq->split.avail_idx_shadow);
686 	vq->num_added++;
687 
688 	pr_debug("Added buffer head %i to %p\n", head, vq);
689 	END_USE(vq);
690 
691 	/* This is very unlikely, but theoretically possible.  Kick
692 	 * just in case. */
693 	if (unlikely(vq->num_added == (1 << 16) - 1))
694 		virtqueue_kick(_vq);
695 
696 	return 0;
697 
698 unmap_release:
699 	err_idx = i;
700 
701 	if (indirect)
702 		i = 0;
703 	else
704 		i = head;
705 
706 	for (n = 0; n < total_sg; n++) {
707 		if (i == err_idx)
708 			break;
709 
710 		i = vring_unmap_one_split(vq, &extra[i]);
711 	}
712 
713 	if (indirect)
714 		kfree(desc);
715 
716 	END_USE(vq);
717 	return -ENOMEM;
718 }
719 
720 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
721 {
722 	struct vring_virtqueue *vq = to_vvq(_vq);
723 	u16 new, old;
724 	bool needs_kick;
725 
726 	START_USE(vq);
727 	/* We need to expose available array entries before checking avail
728 	 * event. */
729 	virtio_mb(vq->weak_barriers);
730 
731 	old = vq->split.avail_idx_shadow - vq->num_added;
732 	new = vq->split.avail_idx_shadow;
733 	vq->num_added = 0;
734 
735 	LAST_ADD_TIME_CHECK(vq);
736 	LAST_ADD_TIME_INVALID(vq);
737 
738 	if (vq->event) {
739 		needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
740 					vring_avail_event(&vq->split.vring)),
741 					      new, old);
742 	} else {
743 		needs_kick = !(vq->split.vring.used->flags &
744 					cpu_to_virtio16(_vq->vdev,
745 						VRING_USED_F_NO_NOTIFY));
746 	}
747 	END_USE(vq);
748 	return needs_kick;
749 }
750 
751 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
752 			     void **ctx)
753 {
754 	struct vring_desc_extra *extra;
755 	unsigned int i, j;
756 	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
757 
758 	/* Clear data ptr. */
759 	vq->split.desc_state[head].data = NULL;
760 
761 	extra = vq->split.desc_extra;
762 
763 	/* Put back on free list: unmap first-level descriptors and find end */
764 	i = head;
765 
766 	while (vq->split.vring.desc[i].flags & nextflag) {
767 		vring_unmap_one_split(vq, &extra[i]);
768 		i = vq->split.desc_extra[i].next;
769 		vq->vq.num_free++;
770 	}
771 
772 	vring_unmap_one_split(vq, &extra[i]);
773 	vq->split.desc_extra[i].next = vq->free_head;
774 	vq->free_head = head;
775 
776 	/* Plus final descriptor */
777 	vq->vq.num_free++;
778 
779 	if (vq->indirect) {
780 		struct vring_desc *indir_desc =
781 				vq->split.desc_state[head].indir_desc;
782 		u32 len, num;
783 
784 		/* Free the indirect table, if any, now that it's unmapped. */
785 		if (!indir_desc)
786 			return;
787 		len = vq->split.desc_extra[head].len;
788 
789 		BUG_ON(!(vq->split.desc_extra[head].flags &
790 				VRING_DESC_F_INDIRECT));
791 		BUG_ON(len == 0 || len % sizeof(struct vring_desc));
792 
793 		num = len / sizeof(struct vring_desc);
794 
795 		extra = (struct vring_desc_extra *)&indir_desc[num];
796 
797 		if (vq->use_map_api) {
798 			for (j = 0; j < num; j++)
799 				vring_unmap_one_split(vq, &extra[j]);
800 		}
801 
802 		kfree(indir_desc);
803 		vq->split.desc_state[head].indir_desc = NULL;
804 	} else if (ctx) {
805 		*ctx = vq->split.desc_state[head].indir_desc;
806 	}
807 }
808 
809 static bool more_used_split(const struct vring_virtqueue *vq)
810 {
811 	return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
812 			vq->split.vring.used->idx);
813 }
814 
815 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
816 					 unsigned int *len,
817 					 void **ctx)
818 {
819 	struct vring_virtqueue *vq = to_vvq(_vq);
820 	void *ret;
821 	unsigned int i;
822 	u16 last_used;
823 
824 	START_USE(vq);
825 
826 	if (unlikely(vq->broken)) {
827 		END_USE(vq);
828 		return NULL;
829 	}
830 
831 	if (!more_used_split(vq)) {
832 		pr_debug("No more buffers in queue\n");
833 		END_USE(vq);
834 		return NULL;
835 	}
836 
837 	/* Only get used array entries after they have been exposed by host. */
838 	virtio_rmb(vq->weak_barriers);
839 
840 	last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
841 	i = virtio32_to_cpu(_vq->vdev,
842 			vq->split.vring.used->ring[last_used].id);
843 	*len = virtio32_to_cpu(_vq->vdev,
844 			vq->split.vring.used->ring[last_used].len);
845 
846 	if (unlikely(i >= vq->split.vring.num)) {
847 		BAD_RING(vq, "id %u out of range\n", i);
848 		return NULL;
849 	}
850 	if (unlikely(!vq->split.desc_state[i].data)) {
851 		BAD_RING(vq, "id %u is not a head!\n", i);
852 		return NULL;
853 	}
854 
855 	/* detach_buf_split clears data, so grab it now. */
856 	ret = vq->split.desc_state[i].data;
857 	detach_buf_split(vq, i, ctx);
858 	vq->last_used_idx++;
859 	/* If we expect an interrupt for the next entry, tell host
860 	 * by writing event index and flush out the write before
861 	 * the read in the next get_buf call. */
862 	if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
863 		virtio_store_mb(vq->weak_barriers,
864 				&vring_used_event(&vq->split.vring),
865 				cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
866 
867 	LAST_ADD_TIME_INVALID(vq);
868 
869 	END_USE(vq);
870 	return ret;
871 }
872 
873 static void virtqueue_disable_cb_split(struct virtqueue *_vq)
874 {
875 	struct vring_virtqueue *vq = to_vvq(_vq);
876 
877 	if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
878 		vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
879 
880 		/*
881 		 * If device triggered an event already it won't trigger one again:
882 		 * no need to disable.
883 		 */
884 		if (vq->event_triggered)
885 			return;
886 
887 		if (vq->event)
888 			/* TODO: this is a hack. Figure out a cleaner value to write. */
889 			vring_used_event(&vq->split.vring) = 0x0;
890 		else
891 			vq->split.vring.avail->flags =
892 				cpu_to_virtio16(_vq->vdev,
893 						vq->split.avail_flags_shadow);
894 	}
895 }
896 
897 static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
898 {
899 	struct vring_virtqueue *vq = to_vvq(_vq);
900 	u16 last_used_idx;
901 
902 	START_USE(vq);
903 
904 	/* We optimistically turn back on interrupts, then check if there was
905 	 * more to do. */
906 	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
907 	 * either clear the flags bit or point the event index at the next
908 	 * entry. Always do both to keep code simple. */
909 	if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
910 		vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
911 		if (!vq->event)
912 			vq->split.vring.avail->flags =
913 				cpu_to_virtio16(_vq->vdev,
914 						vq->split.avail_flags_shadow);
915 	}
916 	vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
917 			last_used_idx = vq->last_used_idx);
918 	END_USE(vq);
919 	return last_used_idx;
920 }
921 
922 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
923 {
924 	struct vring_virtqueue *vq = to_vvq(_vq);
925 
926 	return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
927 			vq->split.vring.used->idx);
928 }
929 
930 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
931 {
932 	struct vring_virtqueue *vq = to_vvq(_vq);
933 	u16 bufs;
934 
935 	START_USE(vq);
936 
937 	/* We optimistically turn back on interrupts, then check if there was
938 	 * more to do. */
939 	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
940 	 * either clear the flags bit or point the event index at the next
941 	 * entry. Always update the event index to keep code simple. */
942 	if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
943 		vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
944 		if (!vq->event)
945 			vq->split.vring.avail->flags =
946 				cpu_to_virtio16(_vq->vdev,
947 						vq->split.avail_flags_shadow);
948 	}
949 	/* TODO: tune this threshold */
950 	bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
951 
952 	virtio_store_mb(vq->weak_barriers,
953 			&vring_used_event(&vq->split.vring),
954 			cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
955 
956 	if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
957 					- vq->last_used_idx) > bufs)) {
958 		END_USE(vq);
959 		return false;
960 	}
961 
962 	END_USE(vq);
963 	return true;
964 }
965 
966 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
967 {
968 	struct vring_virtqueue *vq = to_vvq(_vq);
969 	unsigned int i;
970 	void *buf;
971 
972 	START_USE(vq);
973 
974 	for (i = 0; i < vq->split.vring.num; i++) {
975 		if (!vq->split.desc_state[i].data)
976 			continue;
977 		/* detach_buf_split clears data, so grab it now. */
978 		buf = vq->split.desc_state[i].data;
979 		detach_buf_split(vq, i, NULL);
980 		vq->split.avail_idx_shadow--;
981 		vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
982 				vq->split.avail_idx_shadow);
983 		END_USE(vq);
984 		return buf;
985 	}
986 	/* That should have freed everything. */
987 	BUG_ON(vq->vq.num_free != vq->split.vring.num);
988 
989 	END_USE(vq);
990 	return NULL;
991 }
992 
993 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
994 				       struct vring_virtqueue *vq)
995 {
996 	struct virtio_device *vdev;
997 
998 	vdev = vq->vq.vdev;
999 
1000 	vring_split->avail_flags_shadow = 0;
1001 	vring_split->avail_idx_shadow = 0;
1002 
1003 	/* No callback?  Tell other side not to bother us. */
1004 	if (!vq->vq.callback) {
1005 		vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
1006 		if (!vq->event)
1007 			vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
1008 					vring_split->avail_flags_shadow);
1009 	}
1010 }
1011 
1012 static void virtqueue_reinit_split(struct vring_virtqueue *vq)
1013 {
1014 	int num;
1015 
1016 	num = vq->split.vring.num;
1017 
1018 	vq->split.vring.avail->flags = 0;
1019 	vq->split.vring.avail->idx = 0;
1020 
1021 	/* reset avail event */
1022 	vq->split.vring.avail->ring[num] = 0;
1023 
1024 	vq->split.vring.used->flags = 0;
1025 	vq->split.vring.used->idx = 0;
1026 
1027 	/* reset used event */
1028 	*(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
1029 
1030 	virtqueue_init(vq, num);
1031 
1032 	virtqueue_vring_init_split(&vq->split, vq);
1033 }
1034 
1035 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
1036 					 struct vring_virtqueue_split *vring_split)
1037 {
1038 	vq->split = *vring_split;
1039 
1040 	/* Put everything in free lists. */
1041 	vq->free_head = 0;
1042 }
1043 
1044 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
1045 {
1046 	struct vring_desc_state_split *state;
1047 	struct vring_desc_extra *extra;
1048 	u32 num = vring_split->vring.num;
1049 
1050 	state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
1051 	if (!state)
1052 		goto err_state;
1053 
1054 	extra = vring_alloc_desc_extra(num);
1055 	if (!extra)
1056 		goto err_extra;
1057 
1058 	memset(state, 0, num * sizeof(struct vring_desc_state_split));
1059 
1060 	vring_split->desc_state = state;
1061 	vring_split->desc_extra = extra;
1062 	return 0;
1063 
1064 err_extra:
1065 	kfree(state);
1066 err_state:
1067 	return -ENOMEM;
1068 }
1069 
1070 static void vring_free_split(struct vring_virtqueue_split *vring_split,
1071 			     struct virtio_device *vdev,
1072 			     union virtio_map map)
1073 {
1074 	vring_free_queue(vdev, vring_split->queue_size_in_bytes,
1075 			 vring_split->vring.desc,
1076 			 vring_split->queue_dma_addr,
1077 			 map);
1078 
1079 	kfree(vring_split->desc_state);
1080 	kfree(vring_split->desc_extra);
1081 }
1082 
1083 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
1084 				   struct virtio_device *vdev,
1085 				   u32 num,
1086 				   unsigned int vring_align,
1087 				   bool may_reduce_num,
1088 				   union virtio_map map)
1089 {
1090 	void *queue = NULL;
1091 	dma_addr_t dma_addr;
1092 
1093 	/* We assume num is a power of 2. */
1094 	if (!is_power_of_2(num)) {
1095 		dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
1096 		return -EINVAL;
1097 	}
1098 
1099 	/* TODO: allocate each queue chunk individually */
1100 	for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1101 		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1102 					  &dma_addr,
1103 					  GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1104 					  map);
1105 		if (queue)
1106 			break;
1107 		if (!may_reduce_num)
1108 			return -ENOMEM;
1109 	}
1110 
1111 	if (!num)
1112 		return -ENOMEM;
1113 
1114 	if (!queue) {
1115 		/* Try to get a single page. You are my only hope! */
1116 		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1117 					  &dma_addr, GFP_KERNEL | __GFP_ZERO,
1118 					  map);
1119 	}
1120 	if (!queue)
1121 		return -ENOMEM;
1122 
1123 	vring_init(&vring_split->vring, num, queue, vring_align);
1124 
1125 	vring_split->queue_dma_addr = dma_addr;
1126 	vring_split->queue_size_in_bytes = vring_size(num, vring_align);
1127 
1128 	vring_split->vring_align = vring_align;
1129 	vring_split->may_reduce_num = may_reduce_num;
1130 
1131 	return 0;
1132 }
1133 
1134 static struct virtqueue *__vring_new_virtqueue_split(unsigned int index,
1135 					       struct vring_virtqueue_split *vring_split,
1136 					       struct virtio_device *vdev,
1137 					       bool weak_barriers,
1138 					       bool context,
1139 					       bool (*notify)(struct virtqueue *),
1140 					       void (*callback)(struct virtqueue *),
1141 					       const char *name,
1142 					       union virtio_map map)
1143 {
1144 	struct vring_virtqueue *vq;
1145 	int err;
1146 
1147 	vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1148 	if (!vq)
1149 		return NULL;
1150 
1151 	vq->packed_ring = false;
1152 	vq->vq.callback = callback;
1153 	vq->vq.vdev = vdev;
1154 	vq->vq.name = name;
1155 	vq->vq.index = index;
1156 	vq->vq.reset = false;
1157 	vq->we_own_ring = false;
1158 	vq->notify = notify;
1159 	vq->weak_barriers = weak_barriers;
1160 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
1161 	vq->broken = true;
1162 #else
1163 	vq->broken = false;
1164 #endif
1165 	vq->map = map;
1166 	vq->use_map_api = vring_use_map_api(vdev);
1167 
1168 	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1169 		!context;
1170 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1171 
1172 	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1173 		vq->weak_barriers = false;
1174 
1175 	err = vring_alloc_state_extra_split(vring_split);
1176 	if (err) {
1177 		kfree(vq);
1178 		return NULL;
1179 	}
1180 
1181 	virtqueue_vring_init_split(vring_split, vq);
1182 
1183 	virtqueue_init(vq, vring_split->vring.num);
1184 	virtqueue_vring_attach_split(vq, vring_split);
1185 
1186 	spin_lock(&vdev->vqs_list_lock);
1187 	list_add_tail(&vq->vq.list, &vdev->vqs);
1188 	spin_unlock(&vdev->vqs_list_lock);
1189 	return &vq->vq;
1190 }
1191 
1192 static struct virtqueue *vring_create_virtqueue_split(
1193 	unsigned int index,
1194 	unsigned int num,
1195 	unsigned int vring_align,
1196 	struct virtio_device *vdev,
1197 	bool weak_barriers,
1198 	bool may_reduce_num,
1199 	bool context,
1200 	bool (*notify)(struct virtqueue *),
1201 	void (*callback)(struct virtqueue *),
1202 	const char *name,
1203 	union virtio_map map)
1204 {
1205 	struct vring_virtqueue_split vring_split = {};
1206 	struct virtqueue *vq;
1207 	int err;
1208 
1209 	err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1210 				      may_reduce_num, map);
1211 	if (err)
1212 		return NULL;
1213 
1214 	vq = __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers,
1215 				   context, notify, callback, name, map);
1216 	if (!vq) {
1217 		vring_free_split(&vring_split, vdev, map);
1218 		return NULL;
1219 	}
1220 
1221 	to_vvq(vq)->we_own_ring = true;
1222 
1223 	return vq;
1224 }
1225 
1226 static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
1227 {
1228 	struct vring_virtqueue_split vring_split = {};
1229 	struct vring_virtqueue *vq = to_vvq(_vq);
1230 	struct virtio_device *vdev = _vq->vdev;
1231 	int err;
1232 
1233 	err = vring_alloc_queue_split(&vring_split, vdev, num,
1234 				      vq->split.vring_align,
1235 				      vq->split.may_reduce_num,
1236 				      vq->map);
1237 	if (err)
1238 		goto err;
1239 
1240 	err = vring_alloc_state_extra_split(&vring_split);
1241 	if (err)
1242 		goto err_state_extra;
1243 
1244 	vring_free(&vq->vq);
1245 
1246 	virtqueue_vring_init_split(&vring_split, vq);
1247 
1248 	virtqueue_init(vq, vring_split.vring.num);
1249 	virtqueue_vring_attach_split(vq, &vring_split);
1250 
1251 	return 0;
1252 
1253 err_state_extra:
1254 	vring_free_split(&vring_split, vdev, vq->map);
1255 err:
1256 	virtqueue_reinit_split(vq);
1257 	return -ENOMEM;
1258 }
1259 
1260 
1261 /*
1262  * Packed ring specific functions - *_packed().
1263  */
1264 static bool packed_used_wrap_counter(u16 last_used_idx)
1265 {
1266 	return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1267 }
1268 
1269 static u16 packed_last_used(u16 last_used_idx)
1270 {
1271 	return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1272 }
1273 
1274 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1275 				     const struct vring_desc_extra *extra)
1276 {
1277 	u16 flags;
1278 
1279 	flags = extra->flags;
1280 
1281 	if (flags & VRING_DESC_F_INDIRECT) {
1282 		if (!vq->use_map_api)
1283 			return;
1284 	} else if (!vring_need_unmap_buffer(vq, extra))
1285 		return;
1286 
1287 	virtqueue_unmap_page_attrs(&vq->vq,
1288 				   extra->addr, extra->len,
1289 				   (flags & VRING_DESC_F_WRITE) ?
1290 				   DMA_FROM_DEVICE : DMA_TO_DEVICE,
1291 				   0);
1292 }
1293 
1294 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1295 						       gfp_t gfp)
1296 {
1297 	struct vring_desc_extra *extra;
1298 	struct vring_packed_desc *desc;
1299 	int i, size;
1300 
1301 	/*
1302 	 * We require lowmem mappings for the descriptors because
1303 	 * otherwise virt_to_phys will give us bogus addresses in the
1304 	 * virtqueue.
1305 	 */
1306 	gfp &= ~__GFP_HIGHMEM;
1307 
1308 	size = (sizeof(*desc) + sizeof(*extra)) * total_sg;
1309 
1310 	desc = kmalloc(size, gfp);
1311 	if (!desc)
1312 		return NULL;
1313 
1314 	extra = (struct vring_desc_extra *)&desc[total_sg];
1315 
1316 	for (i = 0; i < total_sg; i++)
1317 		extra[i].next = i + 1;
1318 
1319 	return desc;
1320 }
1321 
1322 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1323 					 struct scatterlist *sgs[],
1324 					 unsigned int total_sg,
1325 					 unsigned int out_sgs,
1326 					 unsigned int in_sgs,
1327 					 void *data,
1328 					 bool premapped,
1329 					 gfp_t gfp)
1330 {
1331 	struct vring_desc_extra *extra;
1332 	struct vring_packed_desc *desc;
1333 	struct scatterlist *sg;
1334 	unsigned int i, n, err_idx, len;
1335 	u16 head, id;
1336 	dma_addr_t addr;
1337 
1338 	head = vq->packed.next_avail_idx;
1339 	desc = alloc_indirect_packed(total_sg, gfp);
1340 	if (!desc)
1341 		return -ENOMEM;
1342 
1343 	extra = (struct vring_desc_extra *)&desc[total_sg];
1344 
1345 	if (unlikely(vq->vq.num_free < 1)) {
1346 		pr_debug("Can't add buf len 1 - avail = 0\n");
1347 		kfree(desc);
1348 		END_USE(vq);
1349 		return -ENOSPC;
1350 	}
1351 
1352 	i = 0;
1353 	id = vq->free_head;
1354 	BUG_ON(id == vq->packed.vring.num);
1355 
1356 	for (n = 0; n < out_sgs + in_sgs; n++) {
1357 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1358 			if (vring_map_one_sg(vq, sg, n < out_sgs ?
1359 					     DMA_TO_DEVICE : DMA_FROM_DEVICE,
1360 					     &addr, &len, premapped))
1361 				goto unmap_release;
1362 
1363 			desc[i].flags = cpu_to_le16(n < out_sgs ?
1364 						0 : VRING_DESC_F_WRITE);
1365 			desc[i].addr = cpu_to_le64(addr);
1366 			desc[i].len = cpu_to_le32(len);
1367 
1368 			if (unlikely(vq->use_map_api)) {
1369 				extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr;
1370 				extra[i].len = len;
1371 				extra[i].flags = n < out_sgs ?  0 : VRING_DESC_F_WRITE;
1372 			}
1373 
1374 			i++;
1375 		}
1376 	}
1377 
1378 	/* Now that the indirect table is filled in, map it. */
1379 	addr = vring_map_single(vq, desc,
1380 			total_sg * sizeof(struct vring_packed_desc),
1381 			DMA_TO_DEVICE);
1382 	if (vring_mapping_error(vq, addr))
1383 		goto unmap_release;
1384 
1385 	vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1386 	vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1387 				sizeof(struct vring_packed_desc));
1388 	vq->packed.vring.desc[head].id = cpu_to_le16(id);
1389 
1390 	if (vq->use_map_api) {
1391 		vq->packed.desc_extra[id].addr = addr;
1392 		vq->packed.desc_extra[id].len = total_sg *
1393 				sizeof(struct vring_packed_desc);
1394 		vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1395 						  vq->packed.avail_used_flags;
1396 	}
1397 
1398 	/*
1399 	 * A driver MUST NOT make the first descriptor in the list
1400 	 * available before all subsequent descriptors comprising
1401 	 * the list are made available.
1402 	 */
1403 	virtio_wmb(vq->weak_barriers);
1404 	vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1405 						vq->packed.avail_used_flags);
1406 
1407 	/* We're using some buffers from the free list. */
1408 	vq->vq.num_free -= 1;
1409 
1410 	/* Update free pointer */
1411 	n = head + 1;
1412 	if (n >= vq->packed.vring.num) {
1413 		n = 0;
1414 		vq->packed.avail_wrap_counter ^= 1;
1415 		vq->packed.avail_used_flags ^=
1416 				1 << VRING_PACKED_DESC_F_AVAIL |
1417 				1 << VRING_PACKED_DESC_F_USED;
1418 	}
1419 	vq->packed.next_avail_idx = n;
1420 	vq->free_head = vq->packed.desc_extra[id].next;
1421 
1422 	/* Store token and indirect buffer state. */
1423 	vq->packed.desc_state[id].num = 1;
1424 	vq->packed.desc_state[id].data = data;
1425 	vq->packed.desc_state[id].indir_desc = desc;
1426 	vq->packed.desc_state[id].last = id;
1427 
1428 	vq->num_added += 1;
1429 
1430 	pr_debug("Added buffer head %i to %p\n", head, vq);
1431 	END_USE(vq);
1432 
1433 	return 0;
1434 
1435 unmap_release:
1436 	err_idx = i;
1437 
1438 	for (i = 0; i < err_idx; i++)
1439 		vring_unmap_extra_packed(vq, &extra[i]);
1440 
1441 	kfree(desc);
1442 
1443 	END_USE(vq);
1444 	return -ENOMEM;
1445 }
1446 
1447 static inline int virtqueue_add_packed(struct virtqueue *_vq,
1448 				       struct scatterlist *sgs[],
1449 				       unsigned int total_sg,
1450 				       unsigned int out_sgs,
1451 				       unsigned int in_sgs,
1452 				       void *data,
1453 				       void *ctx,
1454 				       bool premapped,
1455 				       gfp_t gfp)
1456 {
1457 	struct vring_virtqueue *vq = to_vvq(_vq);
1458 	struct vring_packed_desc *desc;
1459 	struct scatterlist *sg;
1460 	unsigned int i, n, c, descs_used, err_idx, len;
1461 	__le16 head_flags, flags;
1462 	u16 head, id, prev, curr, avail_used_flags;
1463 	int err;
1464 
1465 	START_USE(vq);
1466 
1467 	BUG_ON(data == NULL);
1468 	BUG_ON(ctx && vq->indirect);
1469 
1470 	if (unlikely(vq->broken)) {
1471 		END_USE(vq);
1472 		return -EIO;
1473 	}
1474 
1475 	LAST_ADD_TIME_UPDATE(vq);
1476 
1477 	BUG_ON(total_sg == 0);
1478 
1479 	if (virtqueue_use_indirect(vq, total_sg)) {
1480 		err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1481 						    in_sgs, data, premapped, gfp);
1482 		if (err != -ENOMEM) {
1483 			END_USE(vq);
1484 			return err;
1485 		}
1486 
1487 		/* fall back on direct */
1488 	}
1489 
1490 	head = vq->packed.next_avail_idx;
1491 	avail_used_flags = vq->packed.avail_used_flags;
1492 
1493 	WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1494 
1495 	desc = vq->packed.vring.desc;
1496 	i = head;
1497 	descs_used = total_sg;
1498 
1499 	if (unlikely(vq->vq.num_free < descs_used)) {
1500 		pr_debug("Can't add buf len %i - avail = %i\n",
1501 			 descs_used, vq->vq.num_free);
1502 		END_USE(vq);
1503 		return -ENOSPC;
1504 	}
1505 
1506 	id = vq->free_head;
1507 	BUG_ON(id == vq->packed.vring.num);
1508 
1509 	curr = id;
1510 	c = 0;
1511 	for (n = 0; n < out_sgs + in_sgs; n++) {
1512 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1513 			dma_addr_t addr;
1514 
1515 			if (vring_map_one_sg(vq, sg, n < out_sgs ?
1516 					     DMA_TO_DEVICE : DMA_FROM_DEVICE,
1517 					     &addr, &len, premapped))
1518 				goto unmap_release;
1519 
1520 			flags = cpu_to_le16(vq->packed.avail_used_flags |
1521 				    (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1522 				    (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1523 			if (i == head)
1524 				head_flags = flags;
1525 			else
1526 				desc[i].flags = flags;
1527 
1528 			desc[i].addr = cpu_to_le64(addr);
1529 			desc[i].len = cpu_to_le32(len);
1530 			desc[i].id = cpu_to_le16(id);
1531 
1532 			if (unlikely(vq->use_map_api)) {
1533 				vq->packed.desc_extra[curr].addr = premapped ?
1534 					DMA_MAPPING_ERROR : addr;
1535 				vq->packed.desc_extra[curr].len = len;
1536 				vq->packed.desc_extra[curr].flags =
1537 					le16_to_cpu(flags);
1538 			}
1539 			prev = curr;
1540 			curr = vq->packed.desc_extra[curr].next;
1541 
1542 			if ((unlikely(++i >= vq->packed.vring.num))) {
1543 				i = 0;
1544 				vq->packed.avail_used_flags ^=
1545 					1 << VRING_PACKED_DESC_F_AVAIL |
1546 					1 << VRING_PACKED_DESC_F_USED;
1547 			}
1548 		}
1549 	}
1550 
1551 	if (i <= head)
1552 		vq->packed.avail_wrap_counter ^= 1;
1553 
1554 	/* We're using some buffers from the free list. */
1555 	vq->vq.num_free -= descs_used;
1556 
1557 	/* Update free pointer */
1558 	vq->packed.next_avail_idx = i;
1559 	vq->free_head = curr;
1560 
1561 	/* Store token. */
1562 	vq->packed.desc_state[id].num = descs_used;
1563 	vq->packed.desc_state[id].data = data;
1564 	vq->packed.desc_state[id].indir_desc = ctx;
1565 	vq->packed.desc_state[id].last = prev;
1566 
1567 	/*
1568 	 * A driver MUST NOT make the first descriptor in the list
1569 	 * available before all subsequent descriptors comprising
1570 	 * the list are made available.
1571 	 */
1572 	virtio_wmb(vq->weak_barriers);
1573 	vq->packed.vring.desc[head].flags = head_flags;
1574 	vq->num_added += descs_used;
1575 
1576 	pr_debug("Added buffer head %i to %p\n", head, vq);
1577 	END_USE(vq);
1578 
1579 	return 0;
1580 
1581 unmap_release:
1582 	err_idx = i;
1583 	i = head;
1584 	curr = vq->free_head;
1585 
1586 	vq->packed.avail_used_flags = avail_used_flags;
1587 
1588 	for (n = 0; n < total_sg; n++) {
1589 		if (i == err_idx)
1590 			break;
1591 		vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
1592 		curr = vq->packed.desc_extra[curr].next;
1593 		i++;
1594 		if (i >= vq->packed.vring.num)
1595 			i = 0;
1596 	}
1597 
1598 	END_USE(vq);
1599 	return -EIO;
1600 }
1601 
1602 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1603 {
1604 	struct vring_virtqueue *vq = to_vvq(_vq);
1605 	u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1606 	bool needs_kick;
1607 	union {
1608 		struct {
1609 			__le16 off_wrap;
1610 			__le16 flags;
1611 		};
1612 		u32 u32;
1613 	} snapshot;
1614 
1615 	START_USE(vq);
1616 
1617 	/*
1618 	 * We need to expose the new flags value before checking notification
1619 	 * suppressions.
1620 	 */
1621 	virtio_mb(vq->weak_barriers);
1622 
1623 	old = vq->packed.next_avail_idx - vq->num_added;
1624 	new = vq->packed.next_avail_idx;
1625 	vq->num_added = 0;
1626 
1627 	snapshot.u32 = *(u32 *)vq->packed.vring.device;
1628 	flags = le16_to_cpu(snapshot.flags);
1629 
1630 	LAST_ADD_TIME_CHECK(vq);
1631 	LAST_ADD_TIME_INVALID(vq);
1632 
1633 	if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1634 		needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1635 		goto out;
1636 	}
1637 
1638 	off_wrap = le16_to_cpu(snapshot.off_wrap);
1639 
1640 	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1641 	event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1642 	if (wrap_counter != vq->packed.avail_wrap_counter)
1643 		event_idx -= vq->packed.vring.num;
1644 
1645 	needs_kick = vring_need_event(event_idx, new, old);
1646 out:
1647 	END_USE(vq);
1648 	return needs_kick;
1649 }
1650 
1651 static void detach_buf_packed(struct vring_virtqueue *vq,
1652 			      unsigned int id, void **ctx)
1653 {
1654 	struct vring_desc_state_packed *state = NULL;
1655 	struct vring_packed_desc *desc;
1656 	unsigned int i, curr;
1657 
1658 	state = &vq->packed.desc_state[id];
1659 
1660 	/* Clear data ptr. */
1661 	state->data = NULL;
1662 
1663 	vq->packed.desc_extra[state->last].next = vq->free_head;
1664 	vq->free_head = id;
1665 	vq->vq.num_free += state->num;
1666 
1667 	if (unlikely(vq->use_map_api)) {
1668 		curr = id;
1669 		for (i = 0; i < state->num; i++) {
1670 			vring_unmap_extra_packed(vq,
1671 						 &vq->packed.desc_extra[curr]);
1672 			curr = vq->packed.desc_extra[curr].next;
1673 		}
1674 	}
1675 
1676 	if (vq->indirect) {
1677 		struct vring_desc_extra *extra;
1678 		u32 len, num;
1679 
1680 		/* Free the indirect table, if any, now that it's unmapped. */
1681 		desc = state->indir_desc;
1682 		if (!desc)
1683 			return;
1684 
1685 		if (vq->use_map_api) {
1686 			len = vq->packed.desc_extra[id].len;
1687 			num = len / sizeof(struct vring_packed_desc);
1688 
1689 			extra = (struct vring_desc_extra *)&desc[num];
1690 
1691 			for (i = 0; i < num; i++)
1692 				vring_unmap_extra_packed(vq, &extra[i]);
1693 		}
1694 		kfree(desc);
1695 		state->indir_desc = NULL;
1696 	} else if (ctx) {
1697 		*ctx = state->indir_desc;
1698 	}
1699 }
1700 
1701 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1702 				       u16 idx, bool used_wrap_counter)
1703 {
1704 	bool avail, used;
1705 	u16 flags;
1706 
1707 	flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1708 	avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1709 	used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1710 
1711 	return avail == used && used == used_wrap_counter;
1712 }
1713 
1714 static bool more_used_packed(const struct vring_virtqueue *vq)
1715 {
1716 	u16 last_used;
1717 	u16 last_used_idx;
1718 	bool used_wrap_counter;
1719 
1720 	last_used_idx = READ_ONCE(vq->last_used_idx);
1721 	last_used = packed_last_used(last_used_idx);
1722 	used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1723 	return is_used_desc_packed(vq, last_used, used_wrap_counter);
1724 }
1725 
1726 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1727 					  unsigned int *len,
1728 					  void **ctx)
1729 {
1730 	struct vring_virtqueue *vq = to_vvq(_vq);
1731 	u16 last_used, id, last_used_idx;
1732 	bool used_wrap_counter;
1733 	void *ret;
1734 
1735 	START_USE(vq);
1736 
1737 	if (unlikely(vq->broken)) {
1738 		END_USE(vq);
1739 		return NULL;
1740 	}
1741 
1742 	if (!more_used_packed(vq)) {
1743 		pr_debug("No more buffers in queue\n");
1744 		END_USE(vq);
1745 		return NULL;
1746 	}
1747 
1748 	/* Only get used elements after they have been exposed by host. */
1749 	virtio_rmb(vq->weak_barriers);
1750 
1751 	last_used_idx = READ_ONCE(vq->last_used_idx);
1752 	used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1753 	last_used = packed_last_used(last_used_idx);
1754 	id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1755 	*len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1756 
1757 	if (unlikely(id >= vq->packed.vring.num)) {
1758 		BAD_RING(vq, "id %u out of range\n", id);
1759 		return NULL;
1760 	}
1761 	if (unlikely(!vq->packed.desc_state[id].data)) {
1762 		BAD_RING(vq, "id %u is not a head!\n", id);
1763 		return NULL;
1764 	}
1765 
1766 	/* detach_buf_packed clears data, so grab it now. */
1767 	ret = vq->packed.desc_state[id].data;
1768 	detach_buf_packed(vq, id, ctx);
1769 
1770 	last_used += vq->packed.desc_state[id].num;
1771 	if (unlikely(last_used >= vq->packed.vring.num)) {
1772 		last_used -= vq->packed.vring.num;
1773 		used_wrap_counter ^= 1;
1774 	}
1775 
1776 	last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1777 	WRITE_ONCE(vq->last_used_idx, last_used);
1778 
1779 	/*
1780 	 * If we expect an interrupt for the next entry, tell host
1781 	 * by writing event index and flush out the write before
1782 	 * the read in the next get_buf call.
1783 	 */
1784 	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1785 		virtio_store_mb(vq->weak_barriers,
1786 				&vq->packed.vring.driver->off_wrap,
1787 				cpu_to_le16(vq->last_used_idx));
1788 
1789 	LAST_ADD_TIME_INVALID(vq);
1790 
1791 	END_USE(vq);
1792 	return ret;
1793 }
1794 
1795 static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1796 {
1797 	struct vring_virtqueue *vq = to_vvq(_vq);
1798 
1799 	if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1800 		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1801 
1802 		/*
1803 		 * If device triggered an event already it won't trigger one again:
1804 		 * no need to disable.
1805 		 */
1806 		if (vq->event_triggered)
1807 			return;
1808 
1809 		vq->packed.vring.driver->flags =
1810 			cpu_to_le16(vq->packed.event_flags_shadow);
1811 	}
1812 }
1813 
1814 static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1815 {
1816 	struct vring_virtqueue *vq = to_vvq(_vq);
1817 
1818 	START_USE(vq);
1819 
1820 	/*
1821 	 * We optimistically turn back on interrupts, then check if there was
1822 	 * more to do.
1823 	 */
1824 
1825 	if (vq->event) {
1826 		vq->packed.vring.driver->off_wrap =
1827 			cpu_to_le16(vq->last_used_idx);
1828 		/*
1829 		 * We need to update event offset and event wrap
1830 		 * counter first before updating event flags.
1831 		 */
1832 		virtio_wmb(vq->weak_barriers);
1833 	}
1834 
1835 	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1836 		vq->packed.event_flags_shadow = vq->event ?
1837 				VRING_PACKED_EVENT_FLAG_DESC :
1838 				VRING_PACKED_EVENT_FLAG_ENABLE;
1839 		vq->packed.vring.driver->flags =
1840 				cpu_to_le16(vq->packed.event_flags_shadow);
1841 	}
1842 
1843 	END_USE(vq);
1844 	return vq->last_used_idx;
1845 }
1846 
1847 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1848 {
1849 	struct vring_virtqueue *vq = to_vvq(_vq);
1850 	bool wrap_counter;
1851 	u16 used_idx;
1852 
1853 	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1854 	used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1855 
1856 	return is_used_desc_packed(vq, used_idx, wrap_counter);
1857 }
1858 
1859 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1860 {
1861 	struct vring_virtqueue *vq = to_vvq(_vq);
1862 	u16 used_idx, wrap_counter, last_used_idx;
1863 	u16 bufs;
1864 
1865 	START_USE(vq);
1866 
1867 	/*
1868 	 * We optimistically turn back on interrupts, then check if there was
1869 	 * more to do.
1870 	 */
1871 
1872 	if (vq->event) {
1873 		/* TODO: tune this threshold */
1874 		bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1875 		last_used_idx = READ_ONCE(vq->last_used_idx);
1876 		wrap_counter = packed_used_wrap_counter(last_used_idx);
1877 
1878 		used_idx = packed_last_used(last_used_idx) + bufs;
1879 		if (used_idx >= vq->packed.vring.num) {
1880 			used_idx -= vq->packed.vring.num;
1881 			wrap_counter ^= 1;
1882 		}
1883 
1884 		vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1885 			(wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1886 
1887 		/*
1888 		 * We need to update event offset and event wrap
1889 		 * counter first before updating event flags.
1890 		 */
1891 		virtio_wmb(vq->weak_barriers);
1892 	}
1893 
1894 	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1895 		vq->packed.event_flags_shadow = vq->event ?
1896 				VRING_PACKED_EVENT_FLAG_DESC :
1897 				VRING_PACKED_EVENT_FLAG_ENABLE;
1898 		vq->packed.vring.driver->flags =
1899 				cpu_to_le16(vq->packed.event_flags_shadow);
1900 	}
1901 
1902 	/*
1903 	 * We need to update event suppression structure first
1904 	 * before re-checking for more used buffers.
1905 	 */
1906 	virtio_mb(vq->weak_barriers);
1907 
1908 	last_used_idx = READ_ONCE(vq->last_used_idx);
1909 	wrap_counter = packed_used_wrap_counter(last_used_idx);
1910 	used_idx = packed_last_used(last_used_idx);
1911 	if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1912 		END_USE(vq);
1913 		return false;
1914 	}
1915 
1916 	END_USE(vq);
1917 	return true;
1918 }
1919 
1920 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1921 {
1922 	struct vring_virtqueue *vq = to_vvq(_vq);
1923 	unsigned int i;
1924 	void *buf;
1925 
1926 	START_USE(vq);
1927 
1928 	for (i = 0; i < vq->packed.vring.num; i++) {
1929 		if (!vq->packed.desc_state[i].data)
1930 			continue;
1931 		/* detach_buf clears data, so grab it now. */
1932 		buf = vq->packed.desc_state[i].data;
1933 		detach_buf_packed(vq, i, NULL);
1934 		END_USE(vq);
1935 		return buf;
1936 	}
1937 	/* That should have freed everything. */
1938 	BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1939 
1940 	END_USE(vq);
1941 	return NULL;
1942 }
1943 
1944 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
1945 {
1946 	struct vring_desc_extra *desc_extra;
1947 	unsigned int i;
1948 
1949 	desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1950 				   GFP_KERNEL);
1951 	if (!desc_extra)
1952 		return NULL;
1953 
1954 	memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1955 
1956 	for (i = 0; i < num - 1; i++)
1957 		desc_extra[i].next = i + 1;
1958 
1959 	return desc_extra;
1960 }
1961 
1962 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
1963 			      struct virtio_device *vdev,
1964 			      union virtio_map map)
1965 {
1966 	if (vring_packed->vring.desc)
1967 		vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
1968 				 vring_packed->vring.desc,
1969 				 vring_packed->ring_dma_addr,
1970 				 map);
1971 
1972 	if (vring_packed->vring.driver)
1973 		vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1974 				 vring_packed->vring.driver,
1975 				 vring_packed->driver_event_dma_addr,
1976 				 map);
1977 
1978 	if (vring_packed->vring.device)
1979 		vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1980 				 vring_packed->vring.device,
1981 				 vring_packed->device_event_dma_addr,
1982 				 map);
1983 
1984 	kfree(vring_packed->desc_state);
1985 	kfree(vring_packed->desc_extra);
1986 }
1987 
1988 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
1989 				    struct virtio_device *vdev,
1990 				    u32 num, union virtio_map map)
1991 {
1992 	struct vring_packed_desc *ring;
1993 	struct vring_packed_desc_event *driver, *device;
1994 	dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1995 	size_t ring_size_in_bytes, event_size_in_bytes;
1996 
1997 	ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1998 
1999 	ring = vring_alloc_queue(vdev, ring_size_in_bytes,
2000 				 &ring_dma_addr,
2001 				 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
2002 				 map);
2003 	if (!ring)
2004 		goto err;
2005 
2006 	vring_packed->vring.desc         = ring;
2007 	vring_packed->ring_dma_addr      = ring_dma_addr;
2008 	vring_packed->ring_size_in_bytes = ring_size_in_bytes;
2009 
2010 	event_size_in_bytes = sizeof(struct vring_packed_desc_event);
2011 
2012 	driver = vring_alloc_queue(vdev, event_size_in_bytes,
2013 				   &driver_event_dma_addr,
2014 				   GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
2015 				   map);
2016 	if (!driver)
2017 		goto err;
2018 
2019 	vring_packed->vring.driver          = driver;
2020 	vring_packed->event_size_in_bytes   = event_size_in_bytes;
2021 	vring_packed->driver_event_dma_addr = driver_event_dma_addr;
2022 
2023 	device = vring_alloc_queue(vdev, event_size_in_bytes,
2024 				   &device_event_dma_addr,
2025 				   GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
2026 				   map);
2027 	if (!device)
2028 		goto err;
2029 
2030 	vring_packed->vring.device          = device;
2031 	vring_packed->device_event_dma_addr = device_event_dma_addr;
2032 
2033 	vring_packed->vring.num = num;
2034 
2035 	return 0;
2036 
2037 err:
2038 	vring_free_packed(vring_packed, vdev, map);
2039 	return -ENOMEM;
2040 }
2041 
2042 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
2043 {
2044 	struct vring_desc_state_packed *state;
2045 	struct vring_desc_extra *extra;
2046 	u32 num = vring_packed->vring.num;
2047 
2048 	state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL);
2049 	if (!state)
2050 		goto err_desc_state;
2051 
2052 	memset(state, 0, num * sizeof(struct vring_desc_state_packed));
2053 
2054 	extra = vring_alloc_desc_extra(num);
2055 	if (!extra)
2056 		goto err_desc_extra;
2057 
2058 	vring_packed->desc_state = state;
2059 	vring_packed->desc_extra = extra;
2060 
2061 	return 0;
2062 
2063 err_desc_extra:
2064 	kfree(state);
2065 err_desc_state:
2066 	return -ENOMEM;
2067 }
2068 
2069 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
2070 					bool callback)
2071 {
2072 	vring_packed->next_avail_idx = 0;
2073 	vring_packed->avail_wrap_counter = 1;
2074 	vring_packed->event_flags_shadow = 0;
2075 	vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
2076 
2077 	/* No callback?  Tell other side not to bother us. */
2078 	if (!callback) {
2079 		vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
2080 		vring_packed->vring.driver->flags =
2081 			cpu_to_le16(vring_packed->event_flags_shadow);
2082 	}
2083 }
2084 
2085 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
2086 					  struct vring_virtqueue_packed *vring_packed)
2087 {
2088 	vq->packed = *vring_packed;
2089 
2090 	/* Put everything in free lists. */
2091 	vq->free_head = 0;
2092 }
2093 
2094 static void virtqueue_reinit_packed(struct vring_virtqueue *vq)
2095 {
2096 	memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
2097 	memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
2098 
2099 	/* we need to reset the desc.flags. For more, see is_used_desc_packed() */
2100 	memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
2101 
2102 	virtqueue_init(vq, vq->packed.vring.num);
2103 	virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
2104 }
2105 
2106 static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index,
2107 					       struct vring_virtqueue_packed *vring_packed,
2108 					       struct virtio_device *vdev,
2109 					       bool weak_barriers,
2110 					       bool context,
2111 					       bool (*notify)(struct virtqueue *),
2112 					       void (*callback)(struct virtqueue *),
2113 					       const char *name,
2114 					       union virtio_map map)
2115 {
2116 	struct vring_virtqueue *vq;
2117 	int err;
2118 
2119 	vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2120 	if (!vq)
2121 		return NULL;
2122 
2123 	vq->vq.callback = callback;
2124 	vq->vq.vdev = vdev;
2125 	vq->vq.name = name;
2126 	vq->vq.index = index;
2127 	vq->vq.reset = false;
2128 	vq->we_own_ring = false;
2129 	vq->notify = notify;
2130 	vq->weak_barriers = weak_barriers;
2131 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2132 	vq->broken = true;
2133 #else
2134 	vq->broken = false;
2135 #endif
2136 	vq->packed_ring = true;
2137 	vq->map = map;
2138 	vq->use_map_api = vring_use_map_api(vdev);
2139 
2140 	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2141 		!context;
2142 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2143 
2144 	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2145 		vq->weak_barriers = false;
2146 
2147 	err = vring_alloc_state_extra_packed(vring_packed);
2148 	if (err) {
2149 		kfree(vq);
2150 		return NULL;
2151 	}
2152 
2153 	virtqueue_vring_init_packed(vring_packed, !!callback);
2154 
2155 	virtqueue_init(vq, vring_packed->vring.num);
2156 	virtqueue_vring_attach_packed(vq, vring_packed);
2157 
2158 	spin_lock(&vdev->vqs_list_lock);
2159 	list_add_tail(&vq->vq.list, &vdev->vqs);
2160 	spin_unlock(&vdev->vqs_list_lock);
2161 	return &vq->vq;
2162 }
2163 
2164 static struct virtqueue *vring_create_virtqueue_packed(
2165 	unsigned int index,
2166 	unsigned int num,
2167 	unsigned int vring_align,
2168 	struct virtio_device *vdev,
2169 	bool weak_barriers,
2170 	bool may_reduce_num,
2171 	bool context,
2172 	bool (*notify)(struct virtqueue *),
2173 	void (*callback)(struct virtqueue *),
2174 	const char *name,
2175 	union virtio_map map)
2176 {
2177 	struct vring_virtqueue_packed vring_packed = {};
2178 	struct virtqueue *vq;
2179 
2180 	if (vring_alloc_queue_packed(&vring_packed, vdev, num, map))
2181 		return NULL;
2182 
2183 	vq = __vring_new_virtqueue_packed(index, &vring_packed, vdev, weak_barriers,
2184 					context, notify, callback, name, map);
2185 	if (!vq) {
2186 		vring_free_packed(&vring_packed, vdev, map);
2187 		return NULL;
2188 	}
2189 
2190 	to_vvq(vq)->we_own_ring = true;
2191 
2192 	return vq;
2193 }
2194 
2195 static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
2196 {
2197 	struct vring_virtqueue_packed vring_packed = {};
2198 	struct vring_virtqueue *vq = to_vvq(_vq);
2199 	struct virtio_device *vdev = _vq->vdev;
2200 	int err;
2201 
2202 	if (vring_alloc_queue_packed(&vring_packed, vdev, num, vq->map))
2203 		goto err_ring;
2204 
2205 	err = vring_alloc_state_extra_packed(&vring_packed);
2206 	if (err)
2207 		goto err_state_extra;
2208 
2209 	vring_free(&vq->vq);
2210 
2211 	virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
2212 
2213 	virtqueue_init(vq, vring_packed.vring.num);
2214 	virtqueue_vring_attach_packed(vq, &vring_packed);
2215 
2216 	return 0;
2217 
2218 err_state_extra:
2219 	vring_free_packed(&vring_packed, vdev, vq->map);
2220 err_ring:
2221 	virtqueue_reinit_packed(vq);
2222 	return -ENOMEM;
2223 }
2224 
2225 static int virtqueue_disable_and_recycle(struct virtqueue *_vq,
2226 					 void (*recycle)(struct virtqueue *vq, void *buf))
2227 {
2228 	struct vring_virtqueue *vq = to_vvq(_vq);
2229 	struct virtio_device *vdev = vq->vq.vdev;
2230 	void *buf;
2231 	int err;
2232 
2233 	if (!vq->we_own_ring)
2234 		return -EPERM;
2235 
2236 	if (!vdev->config->disable_vq_and_reset)
2237 		return -ENOENT;
2238 
2239 	if (!vdev->config->enable_vq_after_reset)
2240 		return -ENOENT;
2241 
2242 	err = vdev->config->disable_vq_and_reset(_vq);
2243 	if (err)
2244 		return err;
2245 
2246 	while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
2247 		recycle(_vq, buf);
2248 
2249 	return 0;
2250 }
2251 
2252 static int virtqueue_enable_after_reset(struct virtqueue *_vq)
2253 {
2254 	struct vring_virtqueue *vq = to_vvq(_vq);
2255 	struct virtio_device *vdev = vq->vq.vdev;
2256 
2257 	if (vdev->config->enable_vq_after_reset(_vq))
2258 		return -EBUSY;
2259 
2260 	return 0;
2261 }
2262 
2263 /*
2264  * Generic functions and exported symbols.
2265  */
2266 
2267 static inline int virtqueue_add(struct virtqueue *_vq,
2268 				struct scatterlist *sgs[],
2269 				unsigned int total_sg,
2270 				unsigned int out_sgs,
2271 				unsigned int in_sgs,
2272 				void *data,
2273 				void *ctx,
2274 				bool premapped,
2275 				gfp_t gfp)
2276 {
2277 	struct vring_virtqueue *vq = to_vvq(_vq);
2278 
2279 	return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
2280 					out_sgs, in_sgs, data, ctx, premapped, gfp) :
2281 				 virtqueue_add_split(_vq, sgs, total_sg,
2282 					out_sgs, in_sgs, data, ctx, premapped, gfp);
2283 }
2284 
2285 /**
2286  * virtqueue_add_sgs - expose buffers to other end
2287  * @_vq: the struct virtqueue we're talking about.
2288  * @sgs: array of terminated scatterlists.
2289  * @out_sgs: the number of scatterlists readable by other side
2290  * @in_sgs: the number of scatterlists which are writable (after readable ones)
2291  * @data: the token identifying the buffer.
2292  * @gfp: how to do memory allocations (if necessary).
2293  *
2294  * Caller must ensure we don't call this with other virtqueue operations
2295  * at the same time (except where noted).
2296  *
2297  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2298  *
2299  * NB: ENOSPC is a special code that is only returned on an attempt to add a
2300  * buffer to a full VQ. It indicates that some buffers are outstanding and that
2301  * the operation can be retried after some buffers have been used.
2302  */
2303 int virtqueue_add_sgs(struct virtqueue *_vq,
2304 		      struct scatterlist *sgs[],
2305 		      unsigned int out_sgs,
2306 		      unsigned int in_sgs,
2307 		      void *data,
2308 		      gfp_t gfp)
2309 {
2310 	unsigned int i, total_sg = 0;
2311 
2312 	/* Count them first. */
2313 	for (i = 0; i < out_sgs + in_sgs; i++) {
2314 		struct scatterlist *sg;
2315 
2316 		for (sg = sgs[i]; sg; sg = sg_next(sg))
2317 			total_sg++;
2318 	}
2319 	return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
2320 			     data, NULL, false, gfp);
2321 }
2322 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
2323 
2324 /**
2325  * virtqueue_add_outbuf - expose output buffers to other end
2326  * @vq: the struct virtqueue we're talking about.
2327  * @sg: scatterlist (must be well-formed and terminated!)
2328  * @num: the number of entries in @sg readable by other side
2329  * @data: the token identifying the buffer.
2330  * @gfp: how to do memory allocations (if necessary).
2331  *
2332  * Caller must ensure we don't call this with other virtqueue operations
2333  * at the same time (except where noted).
2334  *
2335  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2336  */
2337 int virtqueue_add_outbuf(struct virtqueue *vq,
2338 			 struct scatterlist *sg, unsigned int num,
2339 			 void *data,
2340 			 gfp_t gfp)
2341 {
2342 	return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, false, gfp);
2343 }
2344 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
2345 
2346 /**
2347  * virtqueue_add_outbuf_premapped - expose output buffers to other end
2348  * @vq: the struct virtqueue we're talking about.
2349  * @sg: scatterlist (must be well-formed and terminated!)
2350  * @num: the number of entries in @sg readable by other side
2351  * @data: the token identifying the buffer.
2352  * @gfp: how to do memory allocations (if necessary).
2353  *
2354  * Caller must ensure we don't call this with other virtqueue operations
2355  * at the same time (except where noted).
2356  *
2357  * Return:
2358  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2359  */
2360 int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
2361 				   struct scatterlist *sg, unsigned int num,
2362 				   void *data,
2363 				   gfp_t gfp)
2364 {
2365 	return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, true, gfp);
2366 }
2367 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped);
2368 
2369 /**
2370  * virtqueue_add_inbuf - expose input buffers to other end
2371  * @vq: the struct virtqueue we're talking about.
2372  * @sg: scatterlist (must be well-formed and terminated!)
2373  * @num: the number of entries in @sg writable by other side
2374  * @data: the token identifying the buffer.
2375  * @gfp: how to do memory allocations (if necessary).
2376  *
2377  * Caller must ensure we don't call this with other virtqueue operations
2378  * at the same time (except where noted).
2379  *
2380  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2381  */
2382 int virtqueue_add_inbuf(struct virtqueue *vq,
2383 			struct scatterlist *sg, unsigned int num,
2384 			void *data,
2385 			gfp_t gfp)
2386 {
2387 	return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp);
2388 }
2389 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
2390 
2391 /**
2392  * virtqueue_add_inbuf_ctx - expose input buffers to other end
2393  * @vq: the struct virtqueue we're talking about.
2394  * @sg: scatterlist (must be well-formed and terminated!)
2395  * @num: the number of entries in @sg writable by other side
2396  * @data: the token identifying the buffer.
2397  * @ctx: extra context for the token
2398  * @gfp: how to do memory allocations (if necessary).
2399  *
2400  * Caller must ensure we don't call this with other virtqueue operations
2401  * at the same time (except where noted).
2402  *
2403  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2404  */
2405 int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2406 			struct scatterlist *sg, unsigned int num,
2407 			void *data,
2408 			void *ctx,
2409 			gfp_t gfp)
2410 {
2411 	return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, false, gfp);
2412 }
2413 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
2414 
2415 /**
2416  * virtqueue_add_inbuf_premapped - expose input buffers to other end
2417  * @vq: the struct virtqueue we're talking about.
2418  * @sg: scatterlist (must be well-formed and terminated!)
2419  * @num: the number of entries in @sg writable by other side
2420  * @data: the token identifying the buffer.
2421  * @ctx: extra context for the token
2422  * @gfp: how to do memory allocations (if necessary).
2423  *
2424  * Caller must ensure we don't call this with other virtqueue operations
2425  * at the same time (except where noted).
2426  *
2427  * Return:
2428  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2429  */
2430 int virtqueue_add_inbuf_premapped(struct virtqueue *vq,
2431 				  struct scatterlist *sg, unsigned int num,
2432 				  void *data,
2433 				  void *ctx,
2434 				  gfp_t gfp)
2435 {
2436 	return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, true, gfp);
2437 }
2438 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_premapped);
2439 
2440 /**
2441  * virtqueue_dma_dev - get the dma dev
2442  * @_vq: the struct virtqueue we're talking about.
2443  *
2444  * Returns the dma dev. That can been used for dma api.
2445  */
2446 struct device *virtqueue_dma_dev(struct virtqueue *_vq)
2447 {
2448 	struct vring_virtqueue *vq = to_vvq(_vq);
2449 
2450 	if (vq->use_map_api && !_vq->vdev->map)
2451 		return vq->map.dma_dev;
2452 	else
2453 		return NULL;
2454 }
2455 EXPORT_SYMBOL_GPL(virtqueue_dma_dev);
2456 
2457 /**
2458  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
2459  * @_vq: the struct virtqueue
2460  *
2461  * Instead of virtqueue_kick(), you can do:
2462  *	if (virtqueue_kick_prepare(vq))
2463  *		virtqueue_notify(vq);
2464  *
2465  * This is sometimes useful because the virtqueue_kick_prepare() needs
2466  * to be serialized, but the actual virtqueue_notify() call does not.
2467  */
2468 bool virtqueue_kick_prepare(struct virtqueue *_vq)
2469 {
2470 	struct vring_virtqueue *vq = to_vvq(_vq);
2471 
2472 	return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
2473 				 virtqueue_kick_prepare_split(_vq);
2474 }
2475 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
2476 
2477 /**
2478  * virtqueue_notify - second half of split virtqueue_kick call.
2479  * @_vq: the struct virtqueue
2480  *
2481  * This does not need to be serialized.
2482  *
2483  * Returns false if host notify failed or queue is broken, otherwise true.
2484  */
2485 bool virtqueue_notify(struct virtqueue *_vq)
2486 {
2487 	struct vring_virtqueue *vq = to_vvq(_vq);
2488 
2489 	if (unlikely(vq->broken))
2490 		return false;
2491 
2492 	/* Prod other side to tell it about changes. */
2493 	if (!vq->notify(_vq)) {
2494 		vq->broken = true;
2495 		return false;
2496 	}
2497 	return true;
2498 }
2499 EXPORT_SYMBOL_GPL(virtqueue_notify);
2500 
2501 /**
2502  * virtqueue_kick - update after add_buf
2503  * @vq: the struct virtqueue
2504  *
2505  * After one or more virtqueue_add_* calls, invoke this to kick
2506  * the other side.
2507  *
2508  * Caller must ensure we don't call this with other virtqueue
2509  * operations at the same time (except where noted).
2510  *
2511  * Returns false if kick failed, otherwise true.
2512  */
2513 bool virtqueue_kick(struct virtqueue *vq)
2514 {
2515 	if (virtqueue_kick_prepare(vq))
2516 		return virtqueue_notify(vq);
2517 	return true;
2518 }
2519 EXPORT_SYMBOL_GPL(virtqueue_kick);
2520 
2521 /**
2522  * virtqueue_get_buf_ctx - get the next used buffer
2523  * @_vq: the struct virtqueue we're talking about.
2524  * @len: the length written into the buffer
2525  * @ctx: extra context for the token
2526  *
2527  * If the device wrote data into the buffer, @len will be set to the
2528  * amount written.  This means you don't need to clear the buffer
2529  * beforehand to ensure there's no data leakage in the case of short
2530  * writes.
2531  *
2532  * Caller must ensure we don't call this with other virtqueue
2533  * operations at the same time (except where noted).
2534  *
2535  * Returns NULL if there are no used buffers, or the "data" token
2536  * handed to virtqueue_add_*().
2537  */
2538 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
2539 			    void **ctx)
2540 {
2541 	struct vring_virtqueue *vq = to_vvq(_vq);
2542 
2543 	return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2544 				 virtqueue_get_buf_ctx_split(_vq, len, ctx);
2545 }
2546 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2547 
2548 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2549 {
2550 	return virtqueue_get_buf_ctx(_vq, len, NULL);
2551 }
2552 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
2553 /**
2554  * virtqueue_disable_cb - disable callbacks
2555  * @_vq: the struct virtqueue we're talking about.
2556  *
2557  * Note that this is not necessarily synchronous, hence unreliable and only
2558  * useful as an optimization.
2559  *
2560  * Unlike other operations, this need not be serialized.
2561  */
2562 void virtqueue_disable_cb(struct virtqueue *_vq)
2563 {
2564 	struct vring_virtqueue *vq = to_vvq(_vq);
2565 
2566 	if (vq->packed_ring)
2567 		virtqueue_disable_cb_packed(_vq);
2568 	else
2569 		virtqueue_disable_cb_split(_vq);
2570 }
2571 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2572 
2573 /**
2574  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2575  * @_vq: the struct virtqueue we're talking about.
2576  *
2577  * This re-enables callbacks; it returns current queue state
2578  * in an opaque unsigned value. This value should be later tested by
2579  * virtqueue_poll, to detect a possible race between the driver checking for
2580  * more work, and enabling callbacks.
2581  *
2582  * Caller must ensure we don't call this with other virtqueue
2583  * operations at the same time (except where noted).
2584  */
2585 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2586 {
2587 	struct vring_virtqueue *vq = to_vvq(_vq);
2588 
2589 	if (vq->event_triggered)
2590 		vq->event_triggered = false;
2591 
2592 	return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2593 				 virtqueue_enable_cb_prepare_split(_vq);
2594 }
2595 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2596 
2597 /**
2598  * virtqueue_poll - query pending used buffers
2599  * @_vq: the struct virtqueue we're talking about.
2600  * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2601  *
2602  * Returns "true" if there are pending used buffers in the queue.
2603  *
2604  * This does not need to be serialized.
2605  */
2606 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
2607 {
2608 	struct vring_virtqueue *vq = to_vvq(_vq);
2609 
2610 	if (unlikely(vq->broken))
2611 		return false;
2612 
2613 	virtio_mb(vq->weak_barriers);
2614 	return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2615 				 virtqueue_poll_split(_vq, last_used_idx);
2616 }
2617 EXPORT_SYMBOL_GPL(virtqueue_poll);
2618 
2619 /**
2620  * virtqueue_enable_cb - restart callbacks after disable_cb.
2621  * @_vq: the struct virtqueue we're talking about.
2622  *
2623  * This re-enables callbacks; it returns "false" if there are pending
2624  * buffers in the queue, to detect a possible race between the driver
2625  * checking for more work, and enabling callbacks.
2626  *
2627  * Caller must ensure we don't call this with other virtqueue
2628  * operations at the same time (except where noted).
2629  */
2630 bool virtqueue_enable_cb(struct virtqueue *_vq)
2631 {
2632 	unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
2633 
2634 	return !virtqueue_poll(_vq, last_used_idx);
2635 }
2636 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2637 
2638 /**
2639  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2640  * @_vq: the struct virtqueue we're talking about.
2641  *
2642  * This re-enables callbacks but hints to the other side to delay
2643  * interrupts until most of the available buffers have been processed;
2644  * it returns "false" if there are many pending buffers in the queue,
2645  * to detect a possible race between the driver checking for more work,
2646  * and enabling callbacks.
2647  *
2648  * Caller must ensure we don't call this with other virtqueue
2649  * operations at the same time (except where noted).
2650  */
2651 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2652 {
2653 	struct vring_virtqueue *vq = to_vvq(_vq);
2654 
2655 	if (vq->event_triggered)
2656 		data_race(vq->event_triggered = false);
2657 
2658 	return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2659 				 virtqueue_enable_cb_delayed_split(_vq);
2660 }
2661 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2662 
2663 /**
2664  * virtqueue_detach_unused_buf - detach first unused buffer
2665  * @_vq: the struct virtqueue we're talking about.
2666  *
2667  * Returns NULL or the "data" token handed to virtqueue_add_*().
2668  * This is not valid on an active queue; it is useful for device
2669  * shutdown or the reset queue.
2670  */
2671 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2672 {
2673 	struct vring_virtqueue *vq = to_vvq(_vq);
2674 
2675 	return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2676 				 virtqueue_detach_unused_buf_split(_vq);
2677 }
2678 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2679 
2680 static inline bool more_used(const struct vring_virtqueue *vq)
2681 {
2682 	return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2683 }
2684 
2685 /**
2686  * vring_interrupt - notify a virtqueue on an interrupt
2687  * @irq: the IRQ number (ignored)
2688  * @_vq: the struct virtqueue to notify
2689  *
2690  * Calls the callback function of @_vq to process the virtqueue
2691  * notification.
2692  */
2693 irqreturn_t vring_interrupt(int irq, void *_vq)
2694 {
2695 	struct vring_virtqueue *vq = to_vvq(_vq);
2696 
2697 	if (!more_used(vq)) {
2698 		pr_debug("virtqueue interrupt with no work for %p\n", vq);
2699 		return IRQ_NONE;
2700 	}
2701 
2702 	if (unlikely(vq->broken)) {
2703 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2704 		dev_warn_once(&vq->vq.vdev->dev,
2705 			      "virtio vring IRQ raised before DRIVER_OK");
2706 		return IRQ_NONE;
2707 #else
2708 		return IRQ_HANDLED;
2709 #endif
2710 	}
2711 
2712 	/* Just a hint for performance: so it's ok that this can be racy! */
2713 	if (vq->event)
2714 		data_race(vq->event_triggered = true);
2715 
2716 	pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2717 	if (vq->vq.callback)
2718 		vq->vq.callback(&vq->vq);
2719 
2720 	return IRQ_HANDLED;
2721 }
2722 EXPORT_SYMBOL_GPL(vring_interrupt);
2723 
2724 struct virtqueue *vring_create_virtqueue(
2725 	unsigned int index,
2726 	unsigned int num,
2727 	unsigned int vring_align,
2728 	struct virtio_device *vdev,
2729 	bool weak_barriers,
2730 	bool may_reduce_num,
2731 	bool context,
2732 	bool (*notify)(struct virtqueue *),
2733 	void (*callback)(struct virtqueue *),
2734 	const char *name)
2735 {
2736 	union virtio_map map = {.dma_dev = vdev->dev.parent};
2737 
2738 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2739 		return vring_create_virtqueue_packed(index, num, vring_align,
2740 				vdev, weak_barriers, may_reduce_num,
2741 				context, notify, callback, name, map);
2742 
2743 	return vring_create_virtqueue_split(index, num, vring_align,
2744 			vdev, weak_barriers, may_reduce_num,
2745 			context, notify, callback, name, map);
2746 }
2747 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2748 
2749 struct virtqueue *vring_create_virtqueue_map(
2750 	unsigned int index,
2751 	unsigned int num,
2752 	unsigned int vring_align,
2753 	struct virtio_device *vdev,
2754 	bool weak_barriers,
2755 	bool may_reduce_num,
2756 	bool context,
2757 	bool (*notify)(struct virtqueue *),
2758 	void (*callback)(struct virtqueue *),
2759 	const char *name,
2760 	union virtio_map map)
2761 {
2762 
2763 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2764 		return vring_create_virtqueue_packed(index, num, vring_align,
2765 				vdev, weak_barriers, may_reduce_num,
2766 				context, notify, callback, name, map);
2767 
2768 	return vring_create_virtqueue_split(index, num, vring_align,
2769 			vdev, weak_barriers, may_reduce_num,
2770 			context, notify, callback, name, map);
2771 }
2772 EXPORT_SYMBOL_GPL(vring_create_virtqueue_map);
2773 
2774 /**
2775  * virtqueue_resize - resize the vring of vq
2776  * @_vq: the struct virtqueue we're talking about.
2777  * @num: new ring num
2778  * @recycle: callback to recycle unused buffers
2779  * @recycle_done: callback to be invoked when recycle for all unused buffers done
2780  *
2781  * When it is really necessary to create a new vring, it will set the current vq
2782  * into the reset state. Then call the passed callback to recycle the buffer
2783  * that is no longer used. Only after the new vring is successfully created, the
2784  * old vring will be released.
2785  *
2786  * Caller must ensure we don't call this with other virtqueue operations
2787  * at the same time (except where noted).
2788  *
2789  * Returns zero or a negative error.
2790  * 0: success.
2791  * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
2792  *  vq can still work normally
2793  * -EBUSY: Failed to sync with device, vq may not work properly
2794  * -ENOENT: Transport or device not supported
2795  * -E2BIG/-EINVAL: num error
2796  * -EPERM: Operation not permitted
2797  *
2798  */
2799 int virtqueue_resize(struct virtqueue *_vq, u32 num,
2800 		     void (*recycle)(struct virtqueue *vq, void *buf),
2801 		     void (*recycle_done)(struct virtqueue *vq))
2802 {
2803 	struct vring_virtqueue *vq = to_vvq(_vq);
2804 	int err, err_reset;
2805 
2806 	if (num > vq->vq.num_max)
2807 		return -E2BIG;
2808 
2809 	if (!num)
2810 		return -EINVAL;
2811 
2812 	if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num)
2813 		return 0;
2814 
2815 	err = virtqueue_disable_and_recycle(_vq, recycle);
2816 	if (err)
2817 		return err;
2818 	if (recycle_done)
2819 		recycle_done(_vq);
2820 
2821 	if (vq->packed_ring)
2822 		err = virtqueue_resize_packed(_vq, num);
2823 	else
2824 		err = virtqueue_resize_split(_vq, num);
2825 
2826 	err_reset = virtqueue_enable_after_reset(_vq);
2827 	if (err_reset)
2828 		return err_reset;
2829 
2830 	return err;
2831 }
2832 EXPORT_SYMBOL_GPL(virtqueue_resize);
2833 
2834 /**
2835  * virtqueue_reset - detach and recycle all unused buffers
2836  * @_vq: the struct virtqueue we're talking about.
2837  * @recycle: callback to recycle unused buffers
2838  * @recycle_done: callback to be invoked when recycle for all unused buffers done
2839  *
2840  * Caller must ensure we don't call this with other virtqueue operations
2841  * at the same time (except where noted).
2842  *
2843  * Returns zero or a negative error.
2844  * 0: success.
2845  * -EBUSY: Failed to sync with device, vq may not work properly
2846  * -ENOENT: Transport or device not supported
2847  * -EPERM: Operation not permitted
2848  */
2849 int virtqueue_reset(struct virtqueue *_vq,
2850 		    void (*recycle)(struct virtqueue *vq, void *buf),
2851 		    void (*recycle_done)(struct virtqueue *vq))
2852 {
2853 	struct vring_virtqueue *vq = to_vvq(_vq);
2854 	int err;
2855 
2856 	err = virtqueue_disable_and_recycle(_vq, recycle);
2857 	if (err)
2858 		return err;
2859 	if (recycle_done)
2860 		recycle_done(_vq);
2861 
2862 	if (vq->packed_ring)
2863 		virtqueue_reinit_packed(vq);
2864 	else
2865 		virtqueue_reinit_split(vq);
2866 
2867 	return virtqueue_enable_after_reset(_vq);
2868 }
2869 EXPORT_SYMBOL_GPL(virtqueue_reset);
2870 
2871 struct virtqueue *vring_new_virtqueue(unsigned int index,
2872 				      unsigned int num,
2873 				      unsigned int vring_align,
2874 				      struct virtio_device *vdev,
2875 				      bool weak_barriers,
2876 				      bool context,
2877 				      void *pages,
2878 				      bool (*notify)(struct virtqueue *vq),
2879 				      void (*callback)(struct virtqueue *vq),
2880 				      const char *name)
2881 {
2882 	struct vring_virtqueue_split vring_split = {};
2883 	union virtio_map map = {.dma_dev = vdev->dev.parent};
2884 
2885 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2886 		struct vring_virtqueue_packed vring_packed = {};
2887 
2888 		vring_packed.vring.num = num;
2889 		vring_packed.vring.desc = pages;
2890 		return __vring_new_virtqueue_packed(index, &vring_packed,
2891 						    vdev, weak_barriers,
2892 						    context, notify, callback,
2893 						    name, map);
2894 	}
2895 
2896 	vring_init(&vring_split.vring, num, pages, vring_align);
2897 	return __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers,
2898 				     context, notify, callback, name,
2899 				     map);
2900 }
2901 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2902 
2903 static void vring_free(struct virtqueue *_vq)
2904 {
2905 	struct vring_virtqueue *vq = to_vvq(_vq);
2906 
2907 	if (vq->we_own_ring) {
2908 		if (vq->packed_ring) {
2909 			vring_free_queue(vq->vq.vdev,
2910 					 vq->packed.ring_size_in_bytes,
2911 					 vq->packed.vring.desc,
2912 					 vq->packed.ring_dma_addr,
2913 					 vq->map);
2914 
2915 			vring_free_queue(vq->vq.vdev,
2916 					 vq->packed.event_size_in_bytes,
2917 					 vq->packed.vring.driver,
2918 					 vq->packed.driver_event_dma_addr,
2919 					 vq->map);
2920 
2921 			vring_free_queue(vq->vq.vdev,
2922 					 vq->packed.event_size_in_bytes,
2923 					 vq->packed.vring.device,
2924 					 vq->packed.device_event_dma_addr,
2925 					 vq->map);
2926 
2927 			kfree(vq->packed.desc_state);
2928 			kfree(vq->packed.desc_extra);
2929 		} else {
2930 			vring_free_queue(vq->vq.vdev,
2931 					 vq->split.queue_size_in_bytes,
2932 					 vq->split.vring.desc,
2933 					 vq->split.queue_dma_addr,
2934 					 vq->map);
2935 		}
2936 	}
2937 	if (!vq->packed_ring) {
2938 		kfree(vq->split.desc_state);
2939 		kfree(vq->split.desc_extra);
2940 	}
2941 }
2942 
2943 void vring_del_virtqueue(struct virtqueue *_vq)
2944 {
2945 	struct vring_virtqueue *vq = to_vvq(_vq);
2946 
2947 	spin_lock(&vq->vq.vdev->vqs_list_lock);
2948 	list_del(&_vq->list);
2949 	spin_unlock(&vq->vq.vdev->vqs_list_lock);
2950 
2951 	vring_free(_vq);
2952 
2953 	kfree(vq);
2954 }
2955 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2956 
2957 u32 vring_notification_data(struct virtqueue *_vq)
2958 {
2959 	struct vring_virtqueue *vq = to_vvq(_vq);
2960 	u16 next;
2961 
2962 	if (vq->packed_ring)
2963 		next = (vq->packed.next_avail_idx &
2964 				~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) |
2965 			vq->packed.avail_wrap_counter <<
2966 				VRING_PACKED_EVENT_F_WRAP_CTR;
2967 	else
2968 		next = vq->split.avail_idx_shadow;
2969 
2970 	return next << 16 | _vq->index;
2971 }
2972 EXPORT_SYMBOL_GPL(vring_notification_data);
2973 
2974 /* Manipulates transport-specific feature bits. */
2975 void vring_transport_features(struct virtio_device *vdev)
2976 {
2977 	unsigned int i;
2978 
2979 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2980 		switch (i) {
2981 		case VIRTIO_RING_F_INDIRECT_DESC:
2982 			break;
2983 		case VIRTIO_RING_F_EVENT_IDX:
2984 			break;
2985 		case VIRTIO_F_VERSION_1:
2986 			break;
2987 		case VIRTIO_F_ACCESS_PLATFORM:
2988 			break;
2989 		case VIRTIO_F_RING_PACKED:
2990 			break;
2991 		case VIRTIO_F_ORDER_PLATFORM:
2992 			break;
2993 		case VIRTIO_F_NOTIFICATION_DATA:
2994 			break;
2995 		default:
2996 			/* We don't understand this bit. */
2997 			__virtio_clear_bit(vdev, i);
2998 		}
2999 	}
3000 }
3001 EXPORT_SYMBOL_GPL(vring_transport_features);
3002 
3003 /**
3004  * virtqueue_get_vring_size - return the size of the virtqueue's vring
3005  * @_vq: the struct virtqueue containing the vring of interest.
3006  *
3007  * Returns the size of the vring.  This is mainly used for boasting to
3008  * userspace.  Unlike other operations, this need not be serialized.
3009  */
3010 unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq)
3011 {
3012 
3013 	const struct vring_virtqueue *vq = to_vvq(_vq);
3014 
3015 	return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
3016 }
3017 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
3018 
3019 /*
3020  * This function should only be called by the core, not directly by the driver.
3021  */
3022 void __virtqueue_break(struct virtqueue *_vq)
3023 {
3024 	struct vring_virtqueue *vq = to_vvq(_vq);
3025 
3026 	/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3027 	WRITE_ONCE(vq->broken, true);
3028 }
3029 EXPORT_SYMBOL_GPL(__virtqueue_break);
3030 
3031 /*
3032  * This function should only be called by the core, not directly by the driver.
3033  */
3034 void __virtqueue_unbreak(struct virtqueue *_vq)
3035 {
3036 	struct vring_virtqueue *vq = to_vvq(_vq);
3037 
3038 	/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3039 	WRITE_ONCE(vq->broken, false);
3040 }
3041 EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
3042 
3043 bool virtqueue_is_broken(const struct virtqueue *_vq)
3044 {
3045 	const struct vring_virtqueue *vq = to_vvq(_vq);
3046 
3047 	return READ_ONCE(vq->broken);
3048 }
3049 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
3050 
3051 /*
3052  * This should prevent the device from being used, allowing drivers to
3053  * recover.  You may need to grab appropriate locks to flush.
3054  */
3055 void virtio_break_device(struct virtio_device *dev)
3056 {
3057 	struct virtqueue *_vq;
3058 
3059 	spin_lock(&dev->vqs_list_lock);
3060 	list_for_each_entry(_vq, &dev->vqs, list) {
3061 		struct vring_virtqueue *vq = to_vvq(_vq);
3062 
3063 		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3064 		WRITE_ONCE(vq->broken, true);
3065 	}
3066 	spin_unlock(&dev->vqs_list_lock);
3067 }
3068 EXPORT_SYMBOL_GPL(virtio_break_device);
3069 
3070 /*
3071  * This should allow the device to be used by the driver. You may
3072  * need to grab appropriate locks to flush the write to
3073  * vq->broken. This should only be used in some specific case e.g
3074  * (probing and restoring). This function should only be called by the
3075  * core, not directly by the driver.
3076  */
3077 void __virtio_unbreak_device(struct virtio_device *dev)
3078 {
3079 	struct virtqueue *_vq;
3080 
3081 	spin_lock(&dev->vqs_list_lock);
3082 	list_for_each_entry(_vq, &dev->vqs, list) {
3083 		struct vring_virtqueue *vq = to_vvq(_vq);
3084 
3085 		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3086 		WRITE_ONCE(vq->broken, false);
3087 	}
3088 	spin_unlock(&dev->vqs_list_lock);
3089 }
3090 EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
3091 
3092 dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq)
3093 {
3094 	const struct vring_virtqueue *vq = to_vvq(_vq);
3095 
3096 	BUG_ON(!vq->we_own_ring);
3097 
3098 	if (vq->packed_ring)
3099 		return vq->packed.ring_dma_addr;
3100 
3101 	return vq->split.queue_dma_addr;
3102 }
3103 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
3104 
3105 dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq)
3106 {
3107 	const struct vring_virtqueue *vq = to_vvq(_vq);
3108 
3109 	BUG_ON(!vq->we_own_ring);
3110 
3111 	if (vq->packed_ring)
3112 		return vq->packed.driver_event_dma_addr;
3113 
3114 	return vq->split.queue_dma_addr +
3115 		((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
3116 }
3117 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
3118 
3119 dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq)
3120 {
3121 	const struct vring_virtqueue *vq = to_vvq(_vq);
3122 
3123 	BUG_ON(!vq->we_own_ring);
3124 
3125 	if (vq->packed_ring)
3126 		return vq->packed.device_event_dma_addr;
3127 
3128 	return vq->split.queue_dma_addr +
3129 		((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
3130 }
3131 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
3132 
3133 /* Only available for split ring */
3134 const struct vring *virtqueue_get_vring(const struct virtqueue *vq)
3135 {
3136 	return &to_vvq(vq)->split.vring;
3137 }
3138 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
3139 
3140 /**
3141  * virtqueue_map_alloc_coherent - alloc coherent mapping
3142  * @vdev: the virtio device we are talking to
3143  * @map: metadata for performing mapping
3144  * @size: the size of the buffer
3145  * @map_handle: the pointer to the mapped address
3146  * @gfp: allocation flag (GFP_XXX)
3147  *
3148  * return virtual address or NULL on error
3149  */
3150 void *virtqueue_map_alloc_coherent(struct virtio_device *vdev,
3151 				   union virtio_map map,
3152 				   size_t size, dma_addr_t *map_handle,
3153 				   gfp_t gfp)
3154 {
3155 	if (vdev->map)
3156 		return vdev->map->alloc(map, size,
3157 					map_handle, gfp);
3158 	else
3159 		return dma_alloc_coherent(map.dma_dev, size,
3160 					  map_handle, gfp);
3161 }
3162 EXPORT_SYMBOL_GPL(virtqueue_map_alloc_coherent);
3163 
3164 /**
3165  * virtqueue_map_free_coherent - free coherent mapping
3166  * @vdev: the virtio device we are talking to
3167  * @map: metadata for performing mapping
3168  * @size: the size of the buffer
3169  * @map_handle: the mapped address that needs to be freed
3170  *
3171  */
3172 void virtqueue_map_free_coherent(struct virtio_device *vdev,
3173 				 union virtio_map map, size_t size, void *vaddr,
3174 				 dma_addr_t map_handle)
3175 {
3176 	if (vdev->map)
3177 		vdev->map->free(map, size, vaddr,
3178 				map_handle, 0);
3179 	else
3180 		dma_free_coherent(map.dma_dev, size, vaddr, map_handle);
3181 }
3182 EXPORT_SYMBOL_GPL(virtqueue_map_free_coherent);
3183 
3184 /**
3185  * virtqueue_map_page_attrs - map a page to the device
3186  * @_vq: the virtqueue we are talking to
3187  * @page: the page that will be mapped by the device
3188  * @offset: the offset in the page for a buffer
3189  * @size: the buffer size
3190  * @dir: mapping direction
3191  * @attrs: mapping attributes
3192  *
3193  * Returns mapped address. Caller should check that by virtqueue_mapping_error().
3194  */
3195 dma_addr_t virtqueue_map_page_attrs(const struct virtqueue *_vq,
3196 				    struct page *page,
3197 				    unsigned long offset,
3198 				    size_t size,
3199 				    enum dma_data_direction dir,
3200 				    unsigned long attrs)
3201 {
3202 	const struct vring_virtqueue *vq = to_vvq(_vq);
3203 	struct virtio_device *vdev = _vq->vdev;
3204 
3205 	if (vdev->map)
3206 		return vdev->map->map_page(vq->map,
3207 					   page, offset, size,
3208 					   dir, attrs);
3209 
3210 	return dma_map_page_attrs(vring_dma_dev(vq),
3211 				  page, offset, size,
3212 				  dir, attrs);
3213 }
3214 EXPORT_SYMBOL_GPL(virtqueue_map_page_attrs);
3215 
3216 /**
3217  * virtqueue_unmap_page_attrs - map a page to the device
3218  * @_vq: the virtqueue we are talking to
3219  * @map_handle: the mapped address
3220  * @size: the buffer size
3221  * @dir: mapping direction
3222  * @attrs: unmapping attributes
3223  */
3224 void virtqueue_unmap_page_attrs(const struct virtqueue *_vq,
3225 				dma_addr_t map_handle,
3226 				size_t size, enum dma_data_direction dir,
3227 				unsigned long attrs)
3228 {
3229 	const struct vring_virtqueue *vq = to_vvq(_vq);
3230 	struct virtio_device *vdev = _vq->vdev;
3231 
3232 	if (vdev->map)
3233 		vdev->map->unmap_page(vq->map,
3234 				      map_handle, size, dir, attrs);
3235 	else
3236 		dma_unmap_page_attrs(vring_dma_dev(vq), map_handle,
3237 				     size, dir, attrs);
3238 }
3239 EXPORT_SYMBOL_GPL(virtqueue_unmap_page_attrs);
3240 
3241 /**
3242  * virtqueue_map_single_attrs - map DMA for _vq
3243  * @_vq: the struct virtqueue we're talking about.
3244  * @ptr: the pointer of the buffer to do dma
3245  * @size: the size of the buffer to do dma
3246  * @dir: DMA direction
3247  * @attrs: DMA Attrs
3248  *
3249  * The caller calls this to do dma mapping in advance. The DMA address can be
3250  * passed to this _vq when it is in pre-mapped mode.
3251  *
3252  * return mapped address. Caller should check that by virtqueue_mapping_error().
3253  */
3254 dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr,
3255 				      size_t size,
3256 				      enum dma_data_direction dir,
3257 				      unsigned long attrs)
3258 {
3259 	const struct vring_virtqueue *vq = to_vvq(_vq);
3260 
3261 	if (!vq->use_map_api) {
3262 		kmsan_handle_dma(virt_to_phys(ptr), size, dir);
3263 		return (dma_addr_t)virt_to_phys(ptr);
3264 	}
3265 
3266 	/* DMA must never operate on areas that might be remapped. */
3267 	if (dev_WARN_ONCE(&_vq->vdev->dev, is_vmalloc_addr(ptr),
3268 			  "rejecting DMA map of vmalloc memory\n"))
3269 		return DMA_MAPPING_ERROR;
3270 
3271 	return virtqueue_map_page_attrs(&vq->vq, virt_to_page(ptr),
3272 					offset_in_page(ptr), size, dir, attrs);
3273 }
3274 EXPORT_SYMBOL_GPL(virtqueue_map_single_attrs);
3275 
3276 /**
3277  * virtqueue_unmap_single_attrs - unmap map for _vq
3278  * @_vq: the struct virtqueue we're talking about.
3279  * @addr: the dma address to unmap
3280  * @size: the size of the buffer
3281  * @dir: DMA direction
3282  * @attrs: DMA Attrs
3283  *
3284  * Unmap the address that is mapped by the virtqueue_map_* APIs.
3285  *
3286  */
3287 void virtqueue_unmap_single_attrs(const struct virtqueue *_vq,
3288 				  dma_addr_t addr,
3289 				  size_t size, enum dma_data_direction dir,
3290 				  unsigned long attrs)
3291 {
3292 	const struct vring_virtqueue *vq = to_vvq(_vq);
3293 
3294 	if (!vq->use_map_api)
3295 		return;
3296 
3297 	virtqueue_unmap_page_attrs(_vq, addr, size, dir, attrs);
3298 }
3299 EXPORT_SYMBOL_GPL(virtqueue_unmap_single_attrs);
3300 
3301 /**
3302  * virtqueue_mapping_error - check dma address
3303  * @_vq: the struct virtqueue we're talking about.
3304  * @addr: DMA address
3305  *
3306  * Returns 0 means dma valid. Other means invalid dma address.
3307  */
3308 int virtqueue_map_mapping_error(const struct virtqueue *_vq, dma_addr_t addr)
3309 {
3310 	const struct vring_virtqueue *vq = to_vvq(_vq);
3311 
3312 	return vring_mapping_error(vq, addr);
3313 }
3314 EXPORT_SYMBOL_GPL(virtqueue_map_mapping_error);
3315 
3316 /**
3317  * virtqueue_map_need_sync - check a dma address needs sync
3318  * @_vq: the struct virtqueue we're talking about.
3319  * @addr: DMA address
3320  *
3321  * Check if the dma address mapped by the virtqueue_map_* APIs needs to be
3322  * synchronized
3323  *
3324  * return bool
3325  */
3326 bool virtqueue_map_need_sync(const struct virtqueue *_vq, dma_addr_t addr)
3327 {
3328 	const struct vring_virtqueue *vq = to_vvq(_vq);
3329 	struct virtio_device *vdev = _vq->vdev;
3330 
3331 	if (!vq->use_map_api)
3332 		return false;
3333 
3334 	if (vdev->map)
3335 		return vdev->map->need_sync(vq->map, addr);
3336 	else
3337 		return dma_need_sync(vring_dma_dev(vq), addr);
3338 }
3339 EXPORT_SYMBOL_GPL(virtqueue_map_need_sync);
3340 
3341 /**
3342  * virtqueue_map_sync_single_range_for_cpu - map sync for cpu
3343  * @_vq: the struct virtqueue we're talking about.
3344  * @addr: DMA address
3345  * @offset: DMA address offset
3346  * @size: buf size for sync
3347  * @dir: DMA direction
3348  *
3349  * Before calling this function, use virtqueue_map_need_sync() to confirm that
3350  * the DMA address really needs to be synchronized
3351  *
3352  */
3353 void virtqueue_map_sync_single_range_for_cpu(const struct virtqueue *_vq,
3354 					     dma_addr_t addr,
3355 					     unsigned long offset, size_t size,
3356 					     enum dma_data_direction dir)
3357 {
3358 	const struct vring_virtqueue *vq = to_vvq(_vq);
3359 	struct virtio_device *vdev = _vq->vdev;
3360 
3361 	if (!vq->use_map_api)
3362 		return;
3363 
3364 	if (vdev->map)
3365 		vdev->map->sync_single_for_cpu(vq->map,
3366 					       addr + offset, size, dir);
3367 	else
3368 		dma_sync_single_range_for_cpu(vring_dma_dev(vq),
3369 					      addr, offset, size, dir);
3370 }
3371 EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_cpu);
3372 
3373 /**
3374  * virtqueue_map_sync_single_range_for_device - map sync for device
3375  * @_vq: the struct virtqueue we're talking about.
3376  * @addr: DMA address
3377  * @offset: DMA address offset
3378  * @size: buf size for sync
3379  * @dir: DMA direction
3380  *
3381  * Before calling this function, use virtqueue_map_need_sync() to confirm that
3382  * the DMA address really needs to be synchronized
3383  */
3384 void virtqueue_map_sync_single_range_for_device(const struct virtqueue *_vq,
3385 						dma_addr_t addr,
3386 						unsigned long offset, size_t size,
3387 						enum dma_data_direction dir)
3388 {
3389 	const struct vring_virtqueue *vq = to_vvq(_vq);
3390 	struct virtio_device *vdev = _vq->vdev;
3391 
3392 	if (!vq->use_map_api)
3393 		return;
3394 
3395 	if (vdev->map)
3396 		vdev->map->sync_single_for_device(vq->map,
3397 						  addr + offset,
3398 						  size, dir);
3399 	else
3400 		dma_sync_single_range_for_device(vring_dma_dev(vq), addr,
3401 						 offset, size, dir);
3402 }
3403 EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_device);
3404 
3405 MODULE_DESCRIPTION("Virtio ring implementation");
3406 MODULE_LICENSE("GPL");
3407