xref: /linux/drivers/virtio/virtio_ring.c (revision 32fe1de5c12471b8c2d613003bd93d111586a10d)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Virtio ring implementation.
3  *
4  *  Copyright 2007 Rusty Russell IBM Corporation
5  */
6 #include <linux/virtio.h>
7 #include <linux/virtio_ring.h>
8 #include <linux/virtio_config.h>
9 #include <linux/device.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/hrtimer.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/kmsan.h>
15 #include <linux/spinlock.h>
16 #include <xen/xen.h>
17 
18 #ifdef DEBUG
19 /* For development, we want to crash whenever the ring is screwed. */
20 #define BAD_RING(_vq, fmt, args...)				\
21 	do {							\
22 		dev_err(&(_vq)->vq.vdev->dev,			\
23 			"%s:"fmt, (_vq)->vq.name, ##args);	\
24 		BUG();						\
25 	} while (0)
26 /* Caller is supposed to guarantee no reentry. */
27 #define START_USE(_vq)						\
28 	do {							\
29 		if ((_vq)->in_use)				\
30 			panic("%s:in_use = %i\n",		\
31 			      (_vq)->vq.name, (_vq)->in_use);	\
32 		(_vq)->in_use = __LINE__;			\
33 	} while (0)
34 #define END_USE(_vq) \
35 	do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
36 #define LAST_ADD_TIME_UPDATE(_vq)				\
37 	do {							\
38 		ktime_t now = ktime_get();			\
39 								\
40 		/* No kick or get, with .1 second between?  Warn. */ \
41 		if ((_vq)->last_add_time_valid)			\
42 			WARN_ON(ktime_to_ms(ktime_sub(now,	\
43 				(_vq)->last_add_time)) > 100);	\
44 		(_vq)->last_add_time = now;			\
45 		(_vq)->last_add_time_valid = true;		\
46 	} while (0)
47 #define LAST_ADD_TIME_CHECK(_vq)				\
48 	do {							\
49 		if ((_vq)->last_add_time_valid) {		\
50 			WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
51 				      (_vq)->last_add_time)) > 100); \
52 		}						\
53 	} while (0)
54 #define LAST_ADD_TIME_INVALID(_vq)				\
55 	((_vq)->last_add_time_valid = false)
56 #else
57 #define BAD_RING(_vq, fmt, args...)				\
58 	do {							\
59 		dev_err(&_vq->vq.vdev->dev,			\
60 			"%s:"fmt, (_vq)->vq.name, ##args);	\
61 		(_vq)->broken = true;				\
62 	} while (0)
63 #define START_USE(vq)
64 #define END_USE(vq)
65 #define LAST_ADD_TIME_UPDATE(vq)
66 #define LAST_ADD_TIME_CHECK(vq)
67 #define LAST_ADD_TIME_INVALID(vq)
68 #endif
69 
70 enum vq_layout {
71 	VQ_LAYOUT_SPLIT = 0,
72 	VQ_LAYOUT_PACKED,
73 	VQ_LAYOUT_SPLIT_IN_ORDER,
74 	VQ_LAYOUT_PACKED_IN_ORDER,
75 };
76 
77 struct vring_desc_state_split {
78 	void *data;			/* Data for callback. */
79 
80 	/* Indirect desc table and extra table, if any. These two will be
81 	 * allocated together. So we won't stress more to the memory allocator.
82 	 */
83 	struct vring_desc *indir_desc;
84 	u32 total_in_len;
85 };
86 
87 struct vring_desc_state_packed {
88 	void *data;			/* Data for callback. */
89 
90 	/* Indirect desc table and extra table, if any. These two will be
91 	 * allocated together. So we won't stress more to the memory allocator.
92 	 */
93 	struct vring_packed_desc *indir_desc;
94 	u16 num;			/* Descriptor list length. */
95 	u16 last;			/* The last desc state in a list. */
96 	u32 total_in_len;		/* In length for the skipped buffer. */
97 };
98 
99 struct vring_desc_extra {
100 	dma_addr_t addr;		/* Descriptor DMA addr. */
101 	u32 len;			/* Descriptor length. */
102 	u16 flags;			/* Descriptor flags. */
103 	u16 next;			/* The next desc state in a list. */
104 };
105 
106 struct vring_virtqueue_split {
107 	/* Actual memory layout for this queue. */
108 	struct vring vring;
109 
110 	/* Last written value to avail->flags */
111 	u16 avail_flags_shadow;
112 
113 	/*
114 	 * Last written value to avail->idx in
115 	 * guest byte order.
116 	 */
117 	u16 avail_idx_shadow;
118 
119 	/* Per-descriptor state. */
120 	struct vring_desc_state_split *desc_state;
121 	struct vring_desc_extra *desc_extra;
122 
123 	/* DMA address and size information */
124 	dma_addr_t queue_dma_addr;
125 	size_t queue_size_in_bytes;
126 
127 	/*
128 	 * The parameters for creating vrings are reserved for creating new
129 	 * vring.
130 	 */
131 	u32 vring_align;
132 	bool may_reduce_num;
133 };
134 
135 struct vring_virtqueue_packed {
136 	/* Actual memory layout for this queue. */
137 	struct {
138 		unsigned int num;
139 		struct vring_packed_desc *desc;
140 		struct vring_packed_desc_event *driver;
141 		struct vring_packed_desc_event *device;
142 	} vring;
143 
144 	/* Driver ring wrap counter. */
145 	bool avail_wrap_counter;
146 
147 	/* Avail used flags. */
148 	u16 avail_used_flags;
149 
150 	/* Index of the next avail descriptor. */
151 	u16 next_avail_idx;
152 
153 	/*
154 	 * Last written value to driver->flags in
155 	 * guest byte order.
156 	 */
157 	u16 event_flags_shadow;
158 
159 	/* Per-descriptor state. */
160 	struct vring_desc_state_packed *desc_state;
161 	struct vring_desc_extra *desc_extra;
162 
163 	/* DMA address and size information */
164 	dma_addr_t ring_dma_addr;
165 	dma_addr_t driver_event_dma_addr;
166 	dma_addr_t device_event_dma_addr;
167 	size_t ring_size_in_bytes;
168 	size_t event_size_in_bytes;
169 };
170 
171 struct vring_virtqueue;
172 
173 struct virtqueue_ops {
174 	int (*add)(struct vring_virtqueue *vq, struct scatterlist *sgs[],
175 		   unsigned int total_sg, unsigned int out_sgs,
176 		   unsigned int in_sgs,	void *data,
177 		   void *ctx, bool premapped, gfp_t gfp,
178 		   unsigned long attr);
179 	void *(*get)(struct vring_virtqueue *vq, unsigned int *len, void **ctx);
180 	bool (*kick_prepare)(struct vring_virtqueue *vq);
181 	void (*disable_cb)(struct vring_virtqueue *vq);
182 	bool (*enable_cb_delayed)(struct vring_virtqueue *vq);
183 	unsigned int (*enable_cb_prepare)(struct vring_virtqueue *vq);
184 	bool (*poll)(const struct vring_virtqueue *vq,
185 		     unsigned int last_used_idx);
186 	void *(*detach_unused_buf)(struct vring_virtqueue *vq);
187 	bool (*more_used)(const struct vring_virtqueue *vq);
188 	int (*resize)(struct vring_virtqueue *vq, u32 num);
189 	void (*reset)(struct vring_virtqueue *vq);
190 };
191 
192 struct vring_virtqueue {
193 	struct virtqueue vq;
194 
195 	/* Is DMA API used? */
196 	bool use_map_api;
197 
198 	/* Can we use weak barriers? */
199 	bool weak_barriers;
200 
201 	/* Other side has made a mess, don't try any more. */
202 	bool broken;
203 
204 	/* Host supports indirect buffers */
205 	bool indirect;
206 
207 	/* Host publishes avail event idx */
208 	bool event;
209 
210 	enum vq_layout layout;
211 
212 	/*
213 	 * Without IN_ORDER it's the head of free buffer list. With
214 	 * IN_ORDER and SPLIT, it's the next available buffer
215 	 * index. With IN_ORDER and PACKED, it's unused.
216 	 */
217 	unsigned int free_head;
218 
219 	/*
220 	 * With IN_ORDER, once we see an in-order batch, this stores
221 	 * this last entry, and until we return the last buffer.
222 	 * After this, id is set to UINT_MAX to mark it invalid.
223 	 * Unused without IN_ORDER.
224 	 */
225 	struct used_entry {
226 		u32 id;
227 		u32 len;
228 	} batch_last;
229 
230 	/* Number we've added since last sync. */
231 	unsigned int num_added;
232 
233 	/* Last used index  we've seen.
234 	 * for split ring, it just contains last used index
235 	 * for packed ring:
236 	 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
237 	 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
238 	 */
239 	u16 last_used_idx;
240 
241 	/* With IN_ORDER and SPLIT, last descriptor id we used to
242 	 * detach buffer.
243 	 */
244 	u16 last_used;
245 
246 	/* Hint for event idx: already triggered no need to disable. */
247 	bool event_triggered;
248 
249 	union {
250 		/* Available for split ring */
251 		struct vring_virtqueue_split split;
252 
253 		/* Available for packed ring */
254 		struct vring_virtqueue_packed packed;
255 	};
256 
257 	/* How to notify other side. FIXME: commonalize hcalls! */
258 	bool (*notify)(struct virtqueue *vq);
259 
260 	/* DMA, allocation, and size information */
261 	bool we_own_ring;
262 
263 	union virtio_map map;
264 
265 #ifdef DEBUG
266 	/* They're supposed to lock for us. */
267 	unsigned int in_use;
268 
269 	/* Figure out if their kicks are too delayed. */
270 	bool last_add_time_valid;
271 	ktime_t last_add_time;
272 #endif
273 };
274 
275 /*
276  * Accessors for device-writable fields in virtio rings.
277  * These fields are concurrently written by the device and read by the driver.
278  * Use READ_ONCE() to prevent compiler optimizations, document the
279  * intentional data race and prevent KCSAN warnings.
280  */
281 static inline u16 vring_read_split_used_idx(const struct vring_virtqueue *vq)
282 {
283 	return virtio16_to_cpu(vq->vq.vdev,
284 			       READ_ONCE(vq->split.vring.used->idx));
285 }
286 
287 static inline u32 vring_read_split_used_id(const struct vring_virtqueue *vq,
288 					   u16 idx)
289 {
290 	return virtio32_to_cpu(vq->vq.vdev,
291 			       READ_ONCE(vq->split.vring.used->ring[idx].id));
292 }
293 
294 static inline u32 vring_read_split_used_len(const struct vring_virtqueue *vq, u16 idx)
295 {
296 	return virtio32_to_cpu(vq->vq.vdev,
297 			       READ_ONCE(vq->split.vring.used->ring[idx].len));
298 }
299 
300 static inline u16 vring_read_split_avail_event(const struct vring_virtqueue *vq)
301 {
302 	return virtio16_to_cpu(vq->vq.vdev,
303 			       READ_ONCE(vring_avail_event(&vq->split.vring)));
304 }
305 
306 static inline u16 vring_read_packed_desc_flags(const struct vring_virtqueue *vq,
307 					       u16 idx)
308 {
309 	return le16_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].flags));
310 }
311 
312 static inline u16 vring_read_packed_desc_id(const struct vring_virtqueue *vq,
313 				            u16 idx)
314 {
315 	return le16_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].id));
316 }
317 
318 static inline u32 vring_read_packed_desc_len(const struct vring_virtqueue *vq,
319 				             u16 idx)
320 {
321 	return le32_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].len));
322 }
323 
324 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
325 static void vring_free(struct virtqueue *_vq);
326 
327 /*
328  * Helpers.
329  */
330 
331 #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq)
332 
333 
334 static inline bool virtqueue_is_packed(const struct vring_virtqueue *vq)
335 {
336 	return vq->layout == VQ_LAYOUT_PACKED ||
337 	       vq->layout == VQ_LAYOUT_PACKED_IN_ORDER;
338 }
339 
340 static inline bool virtqueue_is_in_order(const struct vring_virtqueue *vq)
341 {
342 	return vq->layout == VQ_LAYOUT_SPLIT_IN_ORDER ||
343 	       vq->layout == VQ_LAYOUT_PACKED_IN_ORDER;
344 }
345 
346 static bool virtqueue_use_indirect(const struct vring_virtqueue *vq,
347 				   unsigned int total_sg)
348 {
349 	/*
350 	 * If the host supports indirect descriptor tables, and we have multiple
351 	 * buffers, then go indirect. FIXME: tune this threshold
352 	 */
353 	return (vq->indirect && total_sg > 1 && vq->vq.num_free);
354 }
355 
356 /*
357  * Modern virtio devices have feature bits to specify whether they need a
358  * quirk and bypass the IOMMU. If not there, just use the DMA API.
359  *
360  * If there, the interaction between virtio and DMA API is messy.
361  *
362  * On most systems with virtio, physical addresses match bus addresses,
363  * and it doesn't particularly matter whether we use the DMA API.
364  *
365  * On some systems, including Xen and any system with a physical device
366  * that speaks virtio behind a physical IOMMU, we must use the DMA API
367  * for virtio DMA to work at all.
368  *
369  * On other systems, including SPARC and PPC64, virtio-pci devices are
370  * enumerated as though they are behind an IOMMU, but the virtio host
371  * ignores the IOMMU, so we must either pretend that the IOMMU isn't
372  * there or somehow map everything as the identity.
373  *
374  * For the time being, we preserve historic behavior and bypass the DMA
375  * API.
376  *
377  * TODO: install a per-device DMA ops structure that does the right thing
378  * taking into account all the above quirks, and use the DMA API
379  * unconditionally on data path.
380  */
381 
382 static bool vring_use_map_api(const struct virtio_device *vdev)
383 {
384 	if (!virtio_has_dma_quirk(vdev))
385 		return true;
386 
387 	/* Otherwise, we are left to guess. */
388 	/*
389 	 * In theory, it's possible to have a buggy QEMU-supposed
390 	 * emulated Q35 IOMMU and Xen enabled at the same time.  On
391 	 * such a configuration, virtio has never worked and will
392 	 * not work without an even larger kludge.  Instead, enable
393 	 * the DMA API if we're a Xen guest, which at least allows
394 	 * all of the sensible Xen configurations to work correctly.
395 	 */
396 	if (xen_domain())
397 		return true;
398 
399 	return false;
400 }
401 
402 static bool vring_need_unmap_buffer(const struct vring_virtqueue *vring,
403 				    const struct vring_desc_extra *extra)
404 {
405 	return vring->use_map_api && (extra->addr != DMA_MAPPING_ERROR);
406 }
407 
408 size_t virtio_max_dma_size(const struct virtio_device *vdev)
409 {
410 	size_t max_segment_size = SIZE_MAX;
411 
412 	if (vring_use_map_api(vdev)) {
413 		if (vdev->map) {
414 			max_segment_size =
415 				vdev->map->max_mapping_size(vdev->vmap);
416 		} else
417 			max_segment_size =
418 				dma_max_mapping_size(vdev->dev.parent);
419 	}
420 
421 	return max_segment_size;
422 }
423 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
424 
425 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
426 			       dma_addr_t *map_handle, gfp_t flag,
427 			       union virtio_map map)
428 {
429 	if (vring_use_map_api(vdev)) {
430 		return virtqueue_map_alloc_coherent(vdev, map, size,
431 						    map_handle, flag);
432 	} else {
433 		void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
434 
435 		if (queue) {
436 			phys_addr_t phys_addr = virt_to_phys(queue);
437 			*map_handle = (dma_addr_t)phys_addr;
438 
439 			/*
440 			 * Sanity check: make sure we dind't truncate
441 			 * the address.  The only arches I can find that
442 			 * have 64-bit phys_addr_t but 32-bit dma_addr_t
443 			 * are certain non-highmem MIPS and x86
444 			 * configurations, but these configurations
445 			 * should never allocate physical pages above 32
446 			 * bits, so this is fine.  Just in case, throw a
447 			 * warning and abort if we end up with an
448 			 * unrepresentable address.
449 			 */
450 			if (WARN_ON_ONCE(*map_handle != phys_addr)) {
451 				free_pages_exact(queue, PAGE_ALIGN(size));
452 				return NULL;
453 			}
454 		}
455 		return queue;
456 	}
457 }
458 
459 static void vring_free_queue(struct virtio_device *vdev, size_t size,
460 			     void *queue, dma_addr_t map_handle,
461 			     union virtio_map map)
462 {
463 	if (vring_use_map_api(vdev))
464 		virtqueue_map_free_coherent(vdev, map, size,
465 					    queue, map_handle);
466 	else
467 		free_pages_exact(queue, PAGE_ALIGN(size));
468 }
469 
470 /*
471  * The DMA ops on various arches are rather gnarly right now, and
472  * making all of the arch DMA ops work on the vring device itself
473  * is a mess.
474  */
475 static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
476 {
477 	return vq->map.dma_dev;
478 }
479 
480 static int vring_mapping_error(const struct vring_virtqueue *vq,
481 			       dma_addr_t addr)
482 {
483 	struct virtio_device *vdev = vq->vq.vdev;
484 
485 	if (!vq->use_map_api)
486 		return 0;
487 
488 	if (vdev->map)
489 		return vdev->map->mapping_error(vq->map, addr);
490 	else
491 		return dma_mapping_error(vring_dma_dev(vq), addr);
492 }
493 
494 /* Map one sg entry. */
495 static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
496 			    enum dma_data_direction direction, dma_addr_t *addr,
497 			    u32 *len, bool premapped, unsigned long attr)
498 {
499 	if (premapped) {
500 		*addr = sg_dma_address(sg);
501 		*len = sg_dma_len(sg);
502 		return 0;
503 	}
504 
505 	*len = sg->length;
506 
507 	if (!vq->use_map_api) {
508 		/*
509 		 * If DMA is not used, KMSAN doesn't know that the scatterlist
510 		 * is initialized by the hardware. Explicitly check/unpoison it
511 		 * depending on the direction.
512 		 */
513 		kmsan_handle_dma(sg_phys(sg), sg->length, direction);
514 		*addr = (dma_addr_t)sg_phys(sg);
515 		return 0;
516 	}
517 
518 	/*
519 	 * We can't use dma_map_sg, because we don't use scatterlists in
520 	 * the way it expects (we don't guarantee that the scatterlist
521 	 * will exist for the lifetime of the mapping).
522 	 */
523 	*addr = virtqueue_map_page_attrs(&vq->vq, sg_page(sg),
524 					 sg->offset, sg->length,
525 					 direction, attr);
526 
527 	if (vring_mapping_error(vq, *addr))
528 		return -ENOMEM;
529 
530 	return 0;
531 }
532 
533 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
534 				   void *cpu_addr, size_t size,
535 				   enum dma_data_direction direction)
536 {
537 	if (!vq->use_map_api)
538 		return (dma_addr_t)virt_to_phys(cpu_addr);
539 
540 	return virtqueue_map_single_attrs(&vq->vq, cpu_addr,
541 					  size, direction, 0);
542 }
543 
544 static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
545 {
546 	vq->vq.num_free = num;
547 
548 	if (virtqueue_is_packed(vq))
549 		vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
550 	else
551 		vq->last_used_idx = 0;
552 
553 	vq->last_used = 0;
554 
555 	vq->event_triggered = false;
556 	vq->num_added = 0;
557 
558 #ifdef DEBUG
559 	vq->in_use = false;
560 	vq->last_add_time_valid = false;
561 #endif
562 }
563 
564 
565 /*
566  * Split ring specific functions - *_split().
567  */
568 
569 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
570 					  struct vring_desc_extra *extra)
571 {
572 	u16 flags;
573 
574 	flags = extra->flags;
575 
576 	if (flags & VRING_DESC_F_INDIRECT) {
577 		if (!vq->use_map_api)
578 			goto out;
579 	} else if (!vring_need_unmap_buffer(vq, extra))
580 		goto out;
581 
582 	virtqueue_unmap_page_attrs(&vq->vq,
583 				   extra->addr,
584 				   extra->len,
585 				   (flags & VRING_DESC_F_WRITE) ?
586 				   DMA_FROM_DEVICE : DMA_TO_DEVICE,
587 				   0);
588 
589 out:
590 	return extra->next;
591 }
592 
593 static struct vring_desc *alloc_indirect_split(struct vring_virtqueue *vq,
594 					       unsigned int total_sg,
595 					       gfp_t gfp)
596 {
597 	struct vring_desc_extra *extra;
598 	struct vring_desc *desc;
599 	unsigned int i, size;
600 
601 	/*
602 	 * We require lowmem mappings for the descriptors because
603 	 * otherwise virt_to_phys will give us bogus addresses in the
604 	 * virtqueue.
605 	 */
606 	gfp &= ~__GFP_HIGHMEM;
607 
608 	size = sizeof(*desc) * total_sg + sizeof(*extra) * total_sg;
609 
610 	desc = kmalloc(size, gfp);
611 	if (!desc)
612 		return NULL;
613 
614 	extra = (struct vring_desc_extra *)&desc[total_sg];
615 
616 	for (i = 0; i < total_sg; i++)
617 		extra[i].next = i + 1;
618 
619 	return desc;
620 }
621 
622 static inline unsigned int virtqueue_add_desc_split(struct vring_virtqueue *vq,
623 						    struct vring_desc *desc,
624 						    struct vring_desc_extra *extra,
625 						    unsigned int i,
626 						    dma_addr_t addr,
627 						    unsigned int len,
628 						    u16 flags, bool premapped)
629 {
630 	struct virtio_device *vdev = vq->vq.vdev;
631 	u16 next;
632 
633 	desc[i].flags = cpu_to_virtio16(vdev, flags);
634 	desc[i].addr = cpu_to_virtio64(vdev, addr);
635 	desc[i].len = cpu_to_virtio32(vdev, len);
636 
637 	extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr;
638 	extra[i].len = len;
639 	extra[i].flags = flags;
640 
641 	next = extra[i].next;
642 
643 	desc[i].next = cpu_to_virtio16(vdev, next);
644 
645 	return next;
646 }
647 
648 static inline int virtqueue_add_split(struct vring_virtqueue *vq,
649 				      struct scatterlist *sgs[],
650 				      unsigned int total_sg,
651 				      unsigned int out_sgs,
652 				      unsigned int in_sgs,
653 				      void *data,
654 				      void *ctx,
655 				      bool premapped,
656 				      gfp_t gfp,
657 				      unsigned long attr)
658 {
659 	struct vring_desc_extra *extra;
660 	struct scatterlist *sg;
661 	struct vring_desc *desc;
662 	unsigned int i, n, avail, descs_used, err_idx, sg_count = 0;
663 	/* Total length for in-order */
664 	unsigned int total_in_len = 0;
665 	int head;
666 	bool indirect;
667 
668 	START_USE(vq);
669 
670 	BUG_ON(data == NULL);
671 	BUG_ON(ctx && vq->indirect);
672 
673 	if (unlikely(vq->broken)) {
674 		END_USE(vq);
675 		return -EIO;
676 	}
677 
678 	LAST_ADD_TIME_UPDATE(vq);
679 
680 	BUG_ON(total_sg == 0);
681 
682 	head = vq->free_head;
683 
684 	if (virtqueue_use_indirect(vq, total_sg))
685 		desc = alloc_indirect_split(vq, total_sg, gfp);
686 	else {
687 		desc = NULL;
688 		WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
689 	}
690 
691 	if (desc) {
692 		/* Use a single buffer which doesn't continue */
693 		indirect = true;
694 		/* Set up rest to use this indirect table. */
695 		i = 0;
696 		descs_used = 1;
697 		extra = (struct vring_desc_extra *)&desc[total_sg];
698 	} else {
699 		indirect = false;
700 		desc = vq->split.vring.desc;
701 		extra = vq->split.desc_extra;
702 		i = head;
703 		descs_used = total_sg;
704 	}
705 
706 	if (unlikely(vq->vq.num_free < descs_used)) {
707 		pr_debug("Can't add buf len %i - avail = %i\n",
708 			 descs_used, vq->vq.num_free);
709 		/* FIXME: for historical reasons, we force a notify here if
710 		 * there are outgoing parts to the buffer.  Presumably the
711 		 * host should service the ring ASAP. */
712 		if (out_sgs)
713 			vq->notify(&vq->vq);
714 		if (indirect)
715 			kfree(desc);
716 		END_USE(vq);
717 		return -ENOSPC;
718 	}
719 
720 	for (n = 0; n < out_sgs; n++) {
721 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
722 			dma_addr_t addr;
723 			u32 len;
724 			u16 flags = 0;
725 
726 			if (++sg_count != total_sg)
727 				flags |= VRING_DESC_F_NEXT;
728 
729 			if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr, &len,
730 					     premapped, attr))
731 				goto unmap_release;
732 
733 			/* Note that we trust indirect descriptor
734 			 * table since it use stream DMA mapping.
735 			 */
736 			i = virtqueue_add_desc_split(vq, desc, extra, i, addr,
737 						     len, flags, premapped);
738 		}
739 	}
740 	for (; n < (out_sgs + in_sgs); n++) {
741 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
742 			dma_addr_t addr;
743 			u32 len;
744 			u16 flags = VRING_DESC_F_WRITE;
745 
746 			if (++sg_count != total_sg)
747 				flags |= VRING_DESC_F_NEXT;
748 
749 			if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr, &len,
750 					     premapped, attr))
751 				goto unmap_release;
752 
753 			/* Note that we trust indirect descriptor
754 			 * table since it use stream DMA mapping.
755 			 */
756 			i = virtqueue_add_desc_split(vq, desc, extra, i, addr,
757 						     len, flags, premapped);
758 			total_in_len += len;
759 		}
760 	}
761 
762 	if (indirect) {
763 		/* Now that the indirect table is filled in, map it. */
764 		dma_addr_t addr = vring_map_single(
765 			vq, desc, total_sg * sizeof(struct vring_desc),
766 			DMA_TO_DEVICE);
767 		if (vring_mapping_error(vq, addr))
768 			goto unmap_release;
769 
770 		virtqueue_add_desc_split(vq, vq->split.vring.desc,
771 					 vq->split.desc_extra,
772 					 head, addr,
773 					 total_sg * sizeof(struct vring_desc),
774 					 VRING_DESC_F_INDIRECT, false);
775 	}
776 
777 	/* We're using some buffers from the free list. */
778 	vq->vq.num_free -= descs_used;
779 
780 	/* Update free pointer */
781 	if (virtqueue_is_in_order(vq)) {
782 		vq->free_head += descs_used;
783 		if (vq->free_head >= vq->split.vring.num)
784 			vq->free_head -= vq->split.vring.num;
785 		vq->split.desc_state[head].total_in_len = total_in_len;
786 	} else if (indirect)
787 		vq->free_head = vq->split.desc_extra[head].next;
788 	else
789 		vq->free_head = i;
790 
791 	/* Store token and indirect buffer state. */
792 	vq->split.desc_state[head].data = data;
793 	if (indirect)
794 		vq->split.desc_state[head].indir_desc = desc;
795 	else
796 		vq->split.desc_state[head].indir_desc = ctx;
797 
798 	/* Put entry in available array (but don't update avail->idx until they
799 	 * do sync). */
800 	avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
801 	vq->split.vring.avail->ring[avail] = cpu_to_virtio16(vq->vq.vdev, head);
802 
803 	/* Descriptors and available array need to be set before we expose the
804 	 * new available array entries. */
805 	virtio_wmb(vq->weak_barriers);
806 	vq->split.avail_idx_shadow++;
807 	vq->split.vring.avail->idx = cpu_to_virtio16(vq->vq.vdev,
808 						vq->split.avail_idx_shadow);
809 	vq->num_added++;
810 
811 	pr_debug("Added buffer head %i to %p\n", head, vq);
812 	END_USE(vq);
813 
814 	/* This is very unlikely, but theoretically possible.  Kick
815 	 * just in case. */
816 	if (unlikely(vq->num_added == (1 << 16) - 1))
817 		virtqueue_kick(&vq->vq);
818 
819 	return 0;
820 
821 unmap_release:
822 	err_idx = i;
823 
824 	if (indirect)
825 		i = 0;
826 	else
827 		i = head;
828 
829 	for (n = 0; n < total_sg; n++) {
830 		if (i == err_idx)
831 			break;
832 
833 		i = vring_unmap_one_split(vq, &extra[i]);
834 	}
835 
836 	if (indirect)
837 		kfree(desc);
838 
839 	END_USE(vq);
840 	return -ENOMEM;
841 }
842 
843 static bool virtqueue_kick_prepare_split(struct vring_virtqueue *vq)
844 {
845 	u16 new, old;
846 	bool needs_kick;
847 
848 	START_USE(vq);
849 	/* We need to expose available array entries before checking avail
850 	 * event. */
851 	virtio_mb(vq->weak_barriers);
852 
853 	old = vq->split.avail_idx_shadow - vq->num_added;
854 	new = vq->split.avail_idx_shadow;
855 	vq->num_added = 0;
856 
857 	LAST_ADD_TIME_CHECK(vq);
858 	LAST_ADD_TIME_INVALID(vq);
859 
860 	if (vq->event) {
861 		needs_kick = vring_need_event(vring_read_split_avail_event(vq),
862 					      new, old);
863 	} else {
864 		needs_kick = !(vq->split.vring.used->flags &
865 					cpu_to_virtio16(vq->vq.vdev,
866 						VRING_USED_F_NO_NOTIFY));
867 	}
868 	END_USE(vq);
869 	return needs_kick;
870 }
871 
872 static void detach_indirect_split(struct vring_virtqueue *vq,
873 				  unsigned int head)
874 {
875 	struct vring_desc_extra *extra = vq->split.desc_extra;
876 	struct vring_desc *indir_desc = vq->split.desc_state[head].indir_desc;
877 	unsigned int j;
878 	u32 len, num;
879 
880 	/* Free the indirect table, if any, now that it's unmapped. */
881 	if (!indir_desc)
882 		return;
883 	len = vq->split.desc_extra[head].len;
884 
885 	BUG_ON(!(vq->split.desc_extra[head].flags &
886 			VRING_DESC_F_INDIRECT));
887 	BUG_ON(len == 0 || len % sizeof(struct vring_desc));
888 
889 	num = len / sizeof(struct vring_desc);
890 
891 	extra = (struct vring_desc_extra *)&indir_desc[num];
892 
893 	if (vq->use_map_api) {
894 		for (j = 0; j < num; j++)
895 			vring_unmap_one_split(vq, &extra[j]);
896 	}
897 
898 	kfree(indir_desc);
899 	vq->split.desc_state[head].indir_desc = NULL;
900 }
901 
902 static unsigned detach_buf_split_in_order(struct vring_virtqueue *vq,
903 					  unsigned int head,
904 					  void **ctx)
905 {
906 	struct vring_desc_extra *extra;
907 	unsigned int i;
908 	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
909 
910 	/* Clear data ptr. */
911 	vq->split.desc_state[head].data = NULL;
912 
913 	extra = vq->split.desc_extra;
914 
915 	/* Put back on free list: unmap first-level descriptors and find end */
916 	i = head;
917 
918 	while (vq->split.vring.desc[i].flags & nextflag) {
919 		i = vring_unmap_one_split(vq, &extra[i]);
920 		vq->vq.num_free++;
921 	}
922 
923 	vring_unmap_one_split(vq, &extra[i]);
924 
925 	/* Plus final descriptor */
926 	vq->vq.num_free++;
927 
928 	if (vq->indirect)
929 		detach_indirect_split(vq, head);
930 	else if (ctx)
931 		*ctx = vq->split.desc_state[head].indir_desc;
932 
933 	return i;
934 }
935 
936 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
937 			     void **ctx)
938 {
939 	unsigned int i = detach_buf_split_in_order(vq, head, ctx);
940 
941 	vq->split.desc_extra[i].next = vq->free_head;
942 	vq->free_head = head;
943 }
944 
945 static bool virtqueue_poll_split(const struct vring_virtqueue *vq,
946 				 unsigned int last_used_idx)
947 {
948 	return (u16)last_used_idx != vring_read_split_used_idx(vq);
949 }
950 
951 static bool more_used_split(const struct vring_virtqueue *vq)
952 {
953 	return virtqueue_poll_split(vq, vq->last_used_idx);
954 }
955 
956 static bool more_used_split_in_order(const struct vring_virtqueue *vq)
957 {
958 	if (vq->batch_last.id != UINT_MAX)
959 		return true;
960 
961 	return virtqueue_poll_split(vq, vq->last_used_idx);
962 }
963 
964 static void *virtqueue_get_buf_ctx_split(struct vring_virtqueue *vq,
965 					 unsigned int *len,
966 					 void **ctx)
967 {
968 	void *ret;
969 	unsigned int i;
970 	u16 last_used;
971 
972 	START_USE(vq);
973 
974 	if (unlikely(vq->broken)) {
975 		END_USE(vq);
976 		return NULL;
977 	}
978 
979 	if (!more_used_split(vq)) {
980 		pr_debug("No more buffers in queue\n");
981 		END_USE(vq);
982 		return NULL;
983 	}
984 
985 	/* Only get used array entries after they have been exposed by host. */
986 	virtio_rmb(vq->weak_barriers);
987 
988 	last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
989 	i = vring_read_split_used_id(vq, last_used);
990 	*len = vring_read_split_used_len(vq, last_used);
991 
992 	if (unlikely(i >= vq->split.vring.num)) {
993 		BAD_RING(vq, "id %u out of range\n", i);
994 		return NULL;
995 	}
996 	if (unlikely(!vq->split.desc_state[i].data)) {
997 		BAD_RING(vq, "id %u is not a head!\n", i);
998 		return NULL;
999 	}
1000 
1001 	/* detach_buf_split clears data, so grab it now. */
1002 	ret = vq->split.desc_state[i].data;
1003 	detach_buf_split(vq, i, ctx);
1004 	vq->last_used_idx++;
1005 	/* If we expect an interrupt for the next entry, tell host
1006 	 * by writing event index and flush out the write before
1007 	 * the read in the next get_buf call. */
1008 	if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
1009 		virtio_store_mb(vq->weak_barriers,
1010 				&vring_used_event(&vq->split.vring),
1011 				cpu_to_virtio16(vq->vq.vdev, vq->last_used_idx));
1012 
1013 	LAST_ADD_TIME_INVALID(vq);
1014 
1015 	END_USE(vq);
1016 	return ret;
1017 }
1018 
1019 static void *virtqueue_get_buf_ctx_split_in_order(struct vring_virtqueue *vq,
1020 						  unsigned int *len,
1021 						  void **ctx)
1022 {
1023 	void *ret;
1024 	unsigned int num = vq->split.vring.num;
1025 	unsigned int num_free = vq->vq.num_free;
1026 	u16 last_used, last_used_idx;
1027 
1028 	START_USE(vq);
1029 
1030 	if (unlikely(vq->broken)) {
1031 		END_USE(vq);
1032 		return NULL;
1033 	}
1034 
1035 	last_used = vq->last_used & (num - 1);
1036 	last_used_idx = vq->last_used_idx & (num - 1);
1037 
1038 	if (vq->batch_last.id == UINT_MAX) {
1039 		if (!more_used_split_in_order(vq)) {
1040 			pr_debug("No more buffers in queue\n");
1041 			END_USE(vq);
1042 			return NULL;
1043 		}
1044 
1045 		/*
1046 		 * Only get used array entries after they have been
1047 		 * exposed by host.
1048 		 */
1049 		virtio_rmb(vq->weak_barriers);
1050 
1051 		vq->batch_last.id = vring_read_split_used_id(vq, last_used_idx);
1052 		vq->batch_last.len = vring_read_split_used_len(vq, last_used_idx);
1053 	}
1054 
1055 	if (vq->batch_last.id == last_used) {
1056 		vq->batch_last.id = UINT_MAX;
1057 		*len = vq->batch_last.len;
1058 	} else {
1059 		*len = vq->split.desc_state[last_used].total_in_len;
1060 	}
1061 
1062 	if (unlikely(!vq->split.desc_state[last_used].data)) {
1063 		BAD_RING(vq, "id %u is not a head!\n", last_used);
1064 		return NULL;
1065 	}
1066 
1067 	/* detach_buf_split clears data, so grab it now. */
1068 	ret = vq->split.desc_state[last_used].data;
1069 	detach_buf_split_in_order(vq, last_used, ctx);
1070 
1071 	vq->last_used_idx++;
1072 	vq->last_used += (vq->vq.num_free - num_free);
1073 	/* If we expect an interrupt for the next entry, tell host
1074 	 * by writing event index and flush out the write before
1075 	 * the read in the next get_buf call. */
1076 	if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
1077 		virtio_store_mb(vq->weak_barriers,
1078 				&vring_used_event(&vq->split.vring),
1079 				cpu_to_virtio16(vq->vq.vdev, vq->last_used_idx));
1080 
1081 	LAST_ADD_TIME_INVALID(vq);
1082 
1083 	END_USE(vq);
1084 	return ret;
1085 }
1086 
1087 static void virtqueue_disable_cb_split(struct vring_virtqueue *vq)
1088 {
1089 	if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
1090 		vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
1091 
1092 		/*
1093 		 * If device triggered an event already it won't trigger one again:
1094 		 * no need to disable.
1095 		 */
1096 		if (vq->event_triggered)
1097 			return;
1098 
1099 		if (vq->event)
1100 			/* TODO: this is a hack. Figure out a cleaner value to write. */
1101 			vring_used_event(&vq->split.vring) = 0x0;
1102 		else
1103 			vq->split.vring.avail->flags =
1104 				cpu_to_virtio16(vq->vq.vdev,
1105 						vq->split.avail_flags_shadow);
1106 	}
1107 }
1108 
1109 static unsigned int virtqueue_enable_cb_prepare_split(struct vring_virtqueue *vq)
1110 {
1111 	u16 last_used_idx;
1112 
1113 	START_USE(vq);
1114 
1115 	/* We optimistically turn back on interrupts, then check if there was
1116 	 * more to do. */
1117 	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
1118 	 * either clear the flags bit or point the event index at the next
1119 	 * entry. Always do both to keep code simple. */
1120 	if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
1121 		vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
1122 		if (!vq->event)
1123 			vq->split.vring.avail->flags =
1124 				cpu_to_virtio16(vq->vq.vdev,
1125 						vq->split.avail_flags_shadow);
1126 	}
1127 	vring_used_event(&vq->split.vring) = cpu_to_virtio16(vq->vq.vdev,
1128 			last_used_idx = vq->last_used_idx);
1129 	END_USE(vq);
1130 	return last_used_idx;
1131 }
1132 
1133 static bool virtqueue_enable_cb_delayed_split(struct vring_virtqueue *vq)
1134 {
1135 	u16 bufs;
1136 
1137 	START_USE(vq);
1138 
1139 	/* We optimistically turn back on interrupts, then check if there was
1140 	 * more to do. */
1141 	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
1142 	 * either clear the flags bit or point the event index at the next
1143 	 * entry. Always update the event index to keep code simple. */
1144 	if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
1145 		vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
1146 		if (!vq->event)
1147 			vq->split.vring.avail->flags =
1148 				cpu_to_virtio16(vq->vq.vdev,
1149 						vq->split.avail_flags_shadow);
1150 	}
1151 	/* TODO: tune this threshold */
1152 	bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
1153 
1154 	virtio_store_mb(vq->weak_barriers,
1155 			&vring_used_event(&vq->split.vring),
1156 			cpu_to_virtio16(vq->vq.vdev, vq->last_used_idx + bufs));
1157 
1158 	if (unlikely((u16)(vring_read_split_used_idx(vq)
1159 					- vq->last_used_idx) > bufs)) {
1160 		END_USE(vq);
1161 		return false;
1162 	}
1163 
1164 	END_USE(vq);
1165 	return true;
1166 }
1167 
1168 static void *virtqueue_detach_unused_buf_split(struct vring_virtqueue *vq)
1169 {
1170 	unsigned int i;
1171 	void *buf;
1172 
1173 	START_USE(vq);
1174 
1175 	for (i = 0; i < vq->split.vring.num; i++) {
1176 		if (!vq->split.desc_state[i].data)
1177 			continue;
1178 		/* detach_buf_split clears data, so grab it now. */
1179 		buf = vq->split.desc_state[i].data;
1180 		if (virtqueue_is_in_order(vq))
1181 			detach_buf_split_in_order(vq, i, NULL);
1182 		else
1183 			detach_buf_split(vq, i, NULL);
1184 		vq->split.avail_idx_shadow--;
1185 		vq->split.vring.avail->idx = cpu_to_virtio16(vq->vq.vdev,
1186 				vq->split.avail_idx_shadow);
1187 		END_USE(vq);
1188 		return buf;
1189 	}
1190 	/* That should have freed everything. */
1191 	BUG_ON(vq->vq.num_free != vq->split.vring.num);
1192 
1193 	END_USE(vq);
1194 	return NULL;
1195 }
1196 
1197 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
1198 				       struct vring_virtqueue *vq)
1199 {
1200 	struct virtio_device *vdev;
1201 
1202 	vdev = vq->vq.vdev;
1203 
1204 	vring_split->avail_flags_shadow = 0;
1205 	vring_split->avail_idx_shadow = 0;
1206 
1207 	/* No callback?  Tell other side not to bother us. */
1208 	if (!vq->vq.callback) {
1209 		vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
1210 		if (!vq->event)
1211 			vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
1212 					vring_split->avail_flags_shadow);
1213 	}
1214 }
1215 
1216 static void virtqueue_reset_split(struct vring_virtqueue *vq)
1217 {
1218 	int num;
1219 
1220 	num = vq->split.vring.num;
1221 
1222 	vq->split.vring.avail->flags = 0;
1223 	vq->split.vring.avail->idx = 0;
1224 
1225 	/* reset avail event */
1226 	vq->split.vring.avail->ring[num] = 0;
1227 
1228 	vq->split.vring.used->flags = 0;
1229 	vq->split.vring.used->idx = 0;
1230 
1231 	/* reset used event */
1232 	*(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
1233 
1234 	virtqueue_init(vq, num);
1235 
1236 	virtqueue_vring_init_split(&vq->split, vq);
1237 }
1238 
1239 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
1240 					 struct vring_virtqueue_split *vring_split)
1241 {
1242 	vq->split = *vring_split;
1243 
1244 	/* Put everything in free lists. */
1245 	vq->free_head = 0;
1246 	vq->batch_last.id = UINT_MAX;
1247 }
1248 
1249 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
1250 {
1251 	struct vring_desc_state_split *state;
1252 	struct vring_desc_extra *extra;
1253 	u32 num = vring_split->vring.num;
1254 
1255 	state = kmalloc_objs(struct vring_desc_state_split, num);
1256 	if (!state)
1257 		goto err_state;
1258 
1259 	extra = vring_alloc_desc_extra(num);
1260 	if (!extra)
1261 		goto err_extra;
1262 
1263 	memset(state, 0, num * sizeof(struct vring_desc_state_split));
1264 
1265 	vring_split->desc_state = state;
1266 	vring_split->desc_extra = extra;
1267 	return 0;
1268 
1269 err_extra:
1270 	kfree(state);
1271 err_state:
1272 	return -ENOMEM;
1273 }
1274 
1275 static void vring_free_split(struct vring_virtqueue_split *vring_split,
1276 			     struct virtio_device *vdev,
1277 			     union virtio_map map)
1278 {
1279 	vring_free_queue(vdev, vring_split->queue_size_in_bytes,
1280 			 vring_split->vring.desc,
1281 			 vring_split->queue_dma_addr,
1282 			 map);
1283 
1284 	kfree(vring_split->desc_state);
1285 	kfree(vring_split->desc_extra);
1286 }
1287 
1288 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
1289 				   struct virtio_device *vdev,
1290 				   u32 num,
1291 				   unsigned int vring_align,
1292 				   bool may_reduce_num,
1293 				   union virtio_map map)
1294 {
1295 	void *queue = NULL;
1296 	dma_addr_t dma_addr;
1297 
1298 	/* We assume num is a power of 2. */
1299 	if (!is_power_of_2(num)) {
1300 		dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
1301 		return -EINVAL;
1302 	}
1303 
1304 	/* TODO: allocate each queue chunk individually */
1305 	for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1306 		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1307 					  &dma_addr,
1308 					  GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1309 					  map);
1310 		if (queue)
1311 			break;
1312 		if (!may_reduce_num)
1313 			return -ENOMEM;
1314 	}
1315 
1316 	if (!num)
1317 		return -ENOMEM;
1318 
1319 	if (!queue) {
1320 		/* Try to get a single page. You are my only hope! */
1321 		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1322 					  &dma_addr, GFP_KERNEL | __GFP_ZERO,
1323 					  map);
1324 	}
1325 	if (!queue)
1326 		return -ENOMEM;
1327 
1328 	vring_init(&vring_split->vring, num, queue, vring_align);
1329 
1330 	vring_split->queue_dma_addr = dma_addr;
1331 	vring_split->queue_size_in_bytes = vring_size(num, vring_align);
1332 
1333 	vring_split->vring_align = vring_align;
1334 	vring_split->may_reduce_num = may_reduce_num;
1335 
1336 	return 0;
1337 }
1338 
1339 static const struct virtqueue_ops split_ops;
1340 
1341 static struct virtqueue *__vring_new_virtqueue_split(unsigned int index,
1342 					       struct vring_virtqueue_split *vring_split,
1343 					       struct virtio_device *vdev,
1344 					       bool weak_barriers,
1345 					       bool context,
1346 					       bool (*notify)(struct virtqueue *),
1347 					       void (*callback)(struct virtqueue *),
1348 					       const char *name,
1349 					       union virtio_map map)
1350 {
1351 	struct vring_virtqueue *vq;
1352 	int err;
1353 
1354 	vq = kmalloc_obj(*vq);
1355 	if (!vq)
1356 		return NULL;
1357 
1358 	vq->vq.callback = callback;
1359 	vq->vq.vdev = vdev;
1360 	vq->vq.name = name;
1361 	vq->vq.index = index;
1362 	vq->vq.reset = false;
1363 	vq->we_own_ring = false;
1364 	vq->notify = notify;
1365 	vq->weak_barriers = weak_barriers;
1366 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
1367 	vq->broken = true;
1368 #else
1369 	vq->broken = false;
1370 #endif
1371 	vq->map = map;
1372 	vq->use_map_api = vring_use_map_api(vdev);
1373 
1374 	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1375 		!context;
1376 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1377 	vq->layout = virtio_has_feature(vdev, VIRTIO_F_IN_ORDER) ?
1378 		     VQ_LAYOUT_SPLIT_IN_ORDER : VQ_LAYOUT_SPLIT;
1379 
1380 	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1381 		vq->weak_barriers = false;
1382 
1383 	err = vring_alloc_state_extra_split(vring_split);
1384 	if (err) {
1385 		kfree(vq);
1386 		return NULL;
1387 	}
1388 
1389 	virtqueue_vring_init_split(vring_split, vq);
1390 
1391 	virtqueue_init(vq, vring_split->vring.num);
1392 	virtqueue_vring_attach_split(vq, vring_split);
1393 
1394 	spin_lock(&vdev->vqs_list_lock);
1395 	list_add_tail(&vq->vq.list, &vdev->vqs);
1396 	spin_unlock(&vdev->vqs_list_lock);
1397 	return &vq->vq;
1398 }
1399 
1400 static struct virtqueue *vring_create_virtqueue_split(
1401 	unsigned int index,
1402 	unsigned int num,
1403 	unsigned int vring_align,
1404 	struct virtio_device *vdev,
1405 	bool weak_barriers,
1406 	bool may_reduce_num,
1407 	bool context,
1408 	bool (*notify)(struct virtqueue *),
1409 	void (*callback)(struct virtqueue *),
1410 	const char *name,
1411 	union virtio_map map)
1412 {
1413 	struct vring_virtqueue_split vring_split = {};
1414 	struct virtqueue *vq;
1415 	int err;
1416 
1417 	err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1418 				      may_reduce_num, map);
1419 	if (err)
1420 		return NULL;
1421 
1422 	vq = __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers,
1423 				   context, notify, callback, name, map);
1424 	if (!vq) {
1425 		vring_free_split(&vring_split, vdev, map);
1426 		return NULL;
1427 	}
1428 
1429 	to_vvq(vq)->we_own_ring = true;
1430 
1431 	return vq;
1432 }
1433 
1434 static int virtqueue_resize_split(struct vring_virtqueue *vq, u32 num)
1435 {
1436 	struct vring_virtqueue_split vring_split = {};
1437 	struct virtio_device *vdev = vq->vq.vdev;
1438 	int err;
1439 
1440 	err = vring_alloc_queue_split(&vring_split, vdev, num,
1441 				      vq->split.vring_align,
1442 				      vq->split.may_reduce_num,
1443 				      vq->map);
1444 	if (err)
1445 		goto err;
1446 
1447 	err = vring_alloc_state_extra_split(&vring_split);
1448 	if (err)
1449 		goto err_state_extra;
1450 
1451 	vring_free(&vq->vq);
1452 
1453 	virtqueue_vring_init_split(&vring_split, vq);
1454 
1455 	virtqueue_init(vq, vring_split.vring.num);
1456 	virtqueue_vring_attach_split(vq, &vring_split);
1457 
1458 	return 0;
1459 
1460 err_state_extra:
1461 	vring_free_split(&vring_split, vdev, vq->map);
1462 err:
1463 	virtqueue_reset_split(vq);
1464 	return -ENOMEM;
1465 }
1466 
1467 
1468 /*
1469  * Packed ring specific functions - *_packed().
1470  */
1471 static bool packed_used_wrap_counter(u16 last_used_idx)
1472 {
1473 	return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1474 }
1475 
1476 static u16 packed_last_used(u16 last_used_idx)
1477 {
1478 	return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1479 }
1480 
1481 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1482 				     const struct vring_desc_extra *extra)
1483 {
1484 	u16 flags;
1485 
1486 	flags = extra->flags;
1487 
1488 	if (flags & VRING_DESC_F_INDIRECT) {
1489 		if (!vq->use_map_api)
1490 			return;
1491 	} else if (!vring_need_unmap_buffer(vq, extra))
1492 		return;
1493 
1494 	virtqueue_unmap_page_attrs(&vq->vq,
1495 				   extra->addr, extra->len,
1496 				   (flags & VRING_DESC_F_WRITE) ?
1497 				   DMA_FROM_DEVICE : DMA_TO_DEVICE,
1498 				   0);
1499 }
1500 
1501 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1502 						       gfp_t gfp)
1503 {
1504 	struct vring_desc_extra *extra;
1505 	struct vring_packed_desc *desc;
1506 	int i, size;
1507 
1508 	/*
1509 	 * We require lowmem mappings for the descriptors because
1510 	 * otherwise virt_to_phys will give us bogus addresses in the
1511 	 * virtqueue.
1512 	 */
1513 	gfp &= ~__GFP_HIGHMEM;
1514 
1515 	size = (sizeof(*desc) + sizeof(*extra)) * total_sg;
1516 
1517 	desc = kmalloc(size, gfp);
1518 	if (!desc)
1519 		return NULL;
1520 
1521 	extra = (struct vring_desc_extra *)&desc[total_sg];
1522 
1523 	for (i = 0; i < total_sg; i++)
1524 		extra[i].next = i + 1;
1525 
1526 	return desc;
1527 }
1528 
1529 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1530 					 struct scatterlist *sgs[],
1531 					 unsigned int total_sg,
1532 					 unsigned int out_sgs,
1533 					 unsigned int in_sgs,
1534 					 void *data,
1535 					 bool premapped,
1536 					 gfp_t gfp,
1537 					 u16 id,
1538 					 unsigned long attr)
1539 {
1540 	struct vring_desc_extra *extra;
1541 	struct vring_packed_desc *desc;
1542 	struct scatterlist *sg;
1543 	unsigned int i, n, err_idx, len, total_in_len = 0;
1544 	u16 head;
1545 	dma_addr_t addr;
1546 
1547 	head = vq->packed.next_avail_idx;
1548 	desc = alloc_indirect_packed(total_sg, gfp);
1549 	if (!desc)
1550 		return -ENOMEM;
1551 
1552 	extra = (struct vring_desc_extra *)&desc[total_sg];
1553 
1554 	if (unlikely(vq->vq.num_free < 1)) {
1555 		pr_debug("Can't add buf len 1 - avail = 0\n");
1556 		kfree(desc);
1557 		END_USE(vq);
1558 		return -ENOSPC;
1559 	}
1560 
1561 	i = 0;
1562 
1563 	for (n = 0; n < out_sgs + in_sgs; n++) {
1564 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1565 			if (vring_map_one_sg(vq, sg, n < out_sgs ?
1566 					     DMA_TO_DEVICE : DMA_FROM_DEVICE,
1567 					     &addr, &len, premapped, attr))
1568 				goto unmap_release;
1569 
1570 			desc[i].flags = cpu_to_le16(n < out_sgs ?
1571 						0 : VRING_DESC_F_WRITE);
1572 			desc[i].addr = cpu_to_le64(addr);
1573 			desc[i].len = cpu_to_le32(len);
1574 
1575 			if (unlikely(vq->use_map_api)) {
1576 				extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr;
1577 				extra[i].len = len;
1578 				extra[i].flags = n < out_sgs ?  0 : VRING_DESC_F_WRITE;
1579 			}
1580 
1581 			if (n >= out_sgs)
1582 				total_in_len += len;
1583 			i++;
1584 		}
1585 	}
1586 
1587 	/* Now that the indirect table is filled in, map it. */
1588 	addr = vring_map_single(vq, desc,
1589 			total_sg * sizeof(struct vring_packed_desc),
1590 			DMA_TO_DEVICE);
1591 	if (vring_mapping_error(vq, addr))
1592 		goto unmap_release;
1593 
1594 	vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1595 	vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1596 				sizeof(struct vring_packed_desc));
1597 	vq->packed.vring.desc[head].id = cpu_to_le16(id);
1598 
1599 	if (vq->use_map_api) {
1600 		vq->packed.desc_extra[id].addr = addr;
1601 		vq->packed.desc_extra[id].len = total_sg *
1602 				sizeof(struct vring_packed_desc);
1603 		vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1604 						  vq->packed.avail_used_flags;
1605 	}
1606 
1607 	/*
1608 	 * A driver MUST NOT make the first descriptor in the list
1609 	 * available before all subsequent descriptors comprising
1610 	 * the list are made available.
1611 	 */
1612 	virtio_wmb(vq->weak_barriers);
1613 	vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1614 						vq->packed.avail_used_flags);
1615 
1616 	/* We're using some buffers from the free list. */
1617 	vq->vq.num_free -= 1;
1618 
1619 	/* Update free pointer */
1620 	n = head + 1;
1621 	if (n >= vq->packed.vring.num) {
1622 		n = 0;
1623 		vq->packed.avail_wrap_counter ^= 1;
1624 		vq->packed.avail_used_flags ^=
1625 				1 << VRING_PACKED_DESC_F_AVAIL |
1626 				1 << VRING_PACKED_DESC_F_USED;
1627 	}
1628 	vq->packed.next_avail_idx = n;
1629 	if (!virtqueue_is_in_order(vq))
1630 		vq->free_head = vq->packed.desc_extra[id].next;
1631 
1632 	/* Store token and indirect buffer state. */
1633 	vq->packed.desc_state[id].num = 1;
1634 	vq->packed.desc_state[id].data = data;
1635 	vq->packed.desc_state[id].indir_desc = desc;
1636 	vq->packed.desc_state[id].last = id;
1637 	vq->packed.desc_state[id].total_in_len = total_in_len;
1638 
1639 	vq->num_added += 1;
1640 
1641 	pr_debug("Added buffer head %i to %p\n", head, vq);
1642 	END_USE(vq);
1643 
1644 	return 0;
1645 
1646 unmap_release:
1647 	err_idx = i;
1648 
1649 	for (i = 0; i < err_idx; i++)
1650 		vring_unmap_extra_packed(vq, &extra[i]);
1651 
1652 	kfree(desc);
1653 
1654 	END_USE(vq);
1655 	return -ENOMEM;
1656 }
1657 
1658 static inline int virtqueue_add_packed(struct vring_virtqueue *vq,
1659 				       struct scatterlist *sgs[],
1660 				       unsigned int total_sg,
1661 				       unsigned int out_sgs,
1662 				       unsigned int in_sgs,
1663 				       void *data,
1664 				       void *ctx,
1665 				       bool premapped,
1666 				       gfp_t gfp,
1667 				       unsigned long attr)
1668 {
1669 	struct vring_packed_desc *desc;
1670 	struct scatterlist *sg;
1671 	unsigned int i, n, c, descs_used, err_idx, len;
1672 	__le16 head_flags, flags;
1673 	u16 head, id, prev, curr, avail_used_flags;
1674 	int err;
1675 
1676 	START_USE(vq);
1677 
1678 	BUG_ON(data == NULL);
1679 	BUG_ON(ctx && vq->indirect);
1680 
1681 	if (unlikely(vq->broken)) {
1682 		END_USE(vq);
1683 		return -EIO;
1684 	}
1685 
1686 	LAST_ADD_TIME_UPDATE(vq);
1687 
1688 	BUG_ON(total_sg == 0);
1689 
1690 	if (virtqueue_use_indirect(vq, total_sg)) {
1691 		id = vq->free_head;
1692 		BUG_ON(id == vq->packed.vring.num);
1693 		err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1694 						    in_sgs, data, premapped, gfp,
1695 						    id, attr);
1696 		if (err != -ENOMEM) {
1697 			END_USE(vq);
1698 			return err;
1699 		}
1700 
1701 		/* fall back on direct */
1702 	}
1703 
1704 	head = vq->packed.next_avail_idx;
1705 	avail_used_flags = vq->packed.avail_used_flags;
1706 
1707 	WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1708 
1709 	desc = vq->packed.vring.desc;
1710 	i = head;
1711 	descs_used = total_sg;
1712 
1713 	if (unlikely(vq->vq.num_free < descs_used)) {
1714 		pr_debug("Can't add buf len %i - avail = %i\n",
1715 			 descs_used, vq->vq.num_free);
1716 		END_USE(vq);
1717 		return -ENOSPC;
1718 	}
1719 
1720 	id = vq->free_head;
1721 	BUG_ON(id == vq->packed.vring.num);
1722 
1723 	curr = id;
1724 	c = 0;
1725 	for (n = 0; n < out_sgs + in_sgs; n++) {
1726 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1727 			dma_addr_t addr;
1728 
1729 			if (vring_map_one_sg(vq, sg, n < out_sgs ?
1730 					     DMA_TO_DEVICE : DMA_FROM_DEVICE,
1731 					     &addr, &len, premapped, attr))
1732 				goto unmap_release;
1733 
1734 			flags = cpu_to_le16(vq->packed.avail_used_flags |
1735 				    (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1736 				    (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1737 			if (i == head)
1738 				head_flags = flags;
1739 			else
1740 				desc[i].flags = flags;
1741 
1742 			desc[i].addr = cpu_to_le64(addr);
1743 			desc[i].len = cpu_to_le32(len);
1744 			desc[i].id = cpu_to_le16(id);
1745 
1746 			if (unlikely(vq->use_map_api)) {
1747 				vq->packed.desc_extra[curr].addr = premapped ?
1748 					DMA_MAPPING_ERROR : addr;
1749 				vq->packed.desc_extra[curr].len = len;
1750 				vq->packed.desc_extra[curr].flags =
1751 					le16_to_cpu(flags);
1752 			}
1753 			prev = curr;
1754 			curr = vq->packed.desc_extra[curr].next;
1755 
1756 			if ((unlikely(++i >= vq->packed.vring.num))) {
1757 				i = 0;
1758 				vq->packed.avail_used_flags ^=
1759 					1 << VRING_PACKED_DESC_F_AVAIL |
1760 					1 << VRING_PACKED_DESC_F_USED;
1761 			}
1762 		}
1763 	}
1764 
1765 	if (i <= head)
1766 		vq->packed.avail_wrap_counter ^= 1;
1767 
1768 	/* We're using some buffers from the free list. */
1769 	vq->vq.num_free -= descs_used;
1770 
1771 	/* Update free pointer */
1772 	vq->packed.next_avail_idx = i;
1773 	vq->free_head = curr;
1774 
1775 	/* Store token. */
1776 	vq->packed.desc_state[id].num = descs_used;
1777 	vq->packed.desc_state[id].data = data;
1778 	vq->packed.desc_state[id].indir_desc = ctx;
1779 	vq->packed.desc_state[id].last = prev;
1780 
1781 	/*
1782 	 * A driver MUST NOT make the first descriptor in the list
1783 	 * available before all subsequent descriptors comprising
1784 	 * the list are made available.
1785 	 */
1786 	virtio_wmb(vq->weak_barriers);
1787 	vq->packed.vring.desc[head].flags = head_flags;
1788 	vq->num_added += descs_used;
1789 
1790 	pr_debug("Added buffer head %i to %p\n", head, vq);
1791 	END_USE(vq);
1792 
1793 	return 0;
1794 
1795 unmap_release:
1796 	err_idx = i;
1797 	i = head;
1798 	curr = vq->free_head;
1799 
1800 	vq->packed.avail_used_flags = avail_used_flags;
1801 
1802 	for (n = 0; n < total_sg; n++) {
1803 		if (i == err_idx)
1804 			break;
1805 		vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
1806 		curr = vq->packed.desc_extra[curr].next;
1807 		i++;
1808 		if (i >= vq->packed.vring.num)
1809 			i = 0;
1810 	}
1811 
1812 	END_USE(vq);
1813 	return -EIO;
1814 }
1815 
1816 static inline int virtqueue_add_packed_in_order(struct vring_virtqueue *vq,
1817 						struct scatterlist *sgs[],
1818 						unsigned int total_sg,
1819 						unsigned int out_sgs,
1820 						unsigned int in_sgs,
1821 						void *data,
1822 						void *ctx,
1823 						bool premapped,
1824 						gfp_t gfp,
1825 						unsigned long attr)
1826 {
1827 	struct vring_packed_desc *desc;
1828 	struct scatterlist *sg;
1829 	unsigned int i, n, sg_count, err_idx, total_in_len = 0;
1830 	__le16 head_flags, flags;
1831 	u16 head, avail_used_flags;
1832 	bool avail_wrap_counter;
1833 	int err;
1834 
1835 	START_USE(vq);
1836 
1837 	BUG_ON(data == NULL);
1838 	BUG_ON(ctx && vq->indirect);
1839 
1840 	if (unlikely(vq->broken)) {
1841 		END_USE(vq);
1842 		return -EIO;
1843 	}
1844 
1845 	LAST_ADD_TIME_UPDATE(vq);
1846 
1847 	BUG_ON(total_sg == 0);
1848 
1849 	if (virtqueue_use_indirect(vq, total_sg)) {
1850 		err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1851 						    in_sgs, data, premapped, gfp,
1852 						    vq->packed.next_avail_idx,
1853 						    attr);
1854 		if (err != -ENOMEM) {
1855 			END_USE(vq);
1856 			return err;
1857 		}
1858 
1859 		/* fall back on direct */
1860 	}
1861 
1862 	head = vq->packed.next_avail_idx;
1863 	avail_used_flags = vq->packed.avail_used_flags;
1864 	avail_wrap_counter = vq->packed.avail_wrap_counter;
1865 
1866 	WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1867 
1868 	desc = vq->packed.vring.desc;
1869 	i = head;
1870 
1871 	if (unlikely(vq->vq.num_free < total_sg)) {
1872 		pr_debug("Can't add buf len %i - avail = %i\n",
1873 			 total_sg, vq->vq.num_free);
1874 		END_USE(vq);
1875 		return -ENOSPC;
1876 	}
1877 
1878 	sg_count = 0;
1879 	for (n = 0; n < out_sgs + in_sgs; n++) {
1880 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1881 			dma_addr_t addr;
1882 			u32 len;
1883 
1884 			flags = 0;
1885 			if (++sg_count != total_sg)
1886 				flags |= cpu_to_le16(VRING_DESC_F_NEXT);
1887 			if (n >= out_sgs)
1888 				flags |= cpu_to_le16(VRING_DESC_F_WRITE);
1889 
1890 			if (vring_map_one_sg(vq, sg, n < out_sgs ?
1891 					     DMA_TO_DEVICE : DMA_FROM_DEVICE,
1892 					     &addr, &len, premapped, attr))
1893 				goto unmap_release;
1894 
1895 			flags |= cpu_to_le16(vq->packed.avail_used_flags);
1896 
1897 			if (i == head)
1898 				head_flags = flags;
1899 			else
1900 				desc[i].flags = flags;
1901 
1902 			desc[i].addr = cpu_to_le64(addr);
1903 			desc[i].len = cpu_to_le32(len);
1904 			desc[i].id = cpu_to_le16(head);
1905 
1906 			if (unlikely(vq->use_map_api)) {
1907 				vq->packed.desc_extra[i].addr = premapped ?
1908 				      DMA_MAPPING_ERROR : addr;
1909 				vq->packed.desc_extra[i].len = len;
1910 				vq->packed.desc_extra[i].flags =
1911 					le16_to_cpu(flags);
1912 			}
1913 
1914 			if ((unlikely(++i >= vq->packed.vring.num))) {
1915 				i = 0;
1916 				vq->packed.avail_used_flags ^=
1917 					1 << VRING_PACKED_DESC_F_AVAIL |
1918 					1 << VRING_PACKED_DESC_F_USED;
1919 				vq->packed.avail_wrap_counter ^= 1;
1920 			}
1921 
1922 			if (n >= out_sgs)
1923 				total_in_len += len;
1924 		}
1925 	}
1926 
1927 	/* We're using some buffers from the free list. */
1928 	vq->vq.num_free -= total_sg;
1929 
1930 	/* Update free pointer */
1931 	vq->packed.next_avail_idx = i;
1932 
1933 	/* Store token. */
1934 	vq->packed.desc_state[head].num = total_sg;
1935 	vq->packed.desc_state[head].data = data;
1936 	vq->packed.desc_state[head].indir_desc = ctx;
1937 	vq->packed.desc_state[head].total_in_len = total_in_len;
1938 
1939 	/*
1940 	 * A driver MUST NOT make the first descriptor in the list
1941 	 * available before all subsequent descriptors comprising
1942 	 * the list are made available.
1943 	 */
1944 	virtio_wmb(vq->weak_barriers);
1945 	vq->packed.vring.desc[head].flags = head_flags;
1946 	vq->num_added += total_sg;
1947 
1948 	pr_debug("Added buffer head %i to %p\n", head, vq);
1949 	END_USE(vq);
1950 
1951 	return 0;
1952 
1953 unmap_release:
1954 	err_idx = i;
1955 	i = head;
1956 	vq->packed.avail_used_flags = avail_used_flags;
1957 	vq->packed.avail_wrap_counter = avail_wrap_counter;
1958 
1959 	for (n = 0; n < total_sg; n++) {
1960 		if (i == err_idx)
1961 			break;
1962 		vring_unmap_extra_packed(vq, &vq->packed.desc_extra[i]);
1963 		i++;
1964 		if (i >= vq->packed.vring.num)
1965 			i = 0;
1966 	}
1967 
1968 	END_USE(vq);
1969 	return -EIO;
1970 }
1971 
1972 static bool virtqueue_kick_prepare_packed(struct vring_virtqueue *vq)
1973 {
1974 	u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1975 	bool needs_kick;
1976 	union {
1977 		struct {
1978 			__le16 off_wrap;
1979 			__le16 flags;
1980 		};
1981 		u32 u32;
1982 	} snapshot;
1983 
1984 	START_USE(vq);
1985 
1986 	/*
1987 	 * We need to expose the new flags value before checking notification
1988 	 * suppressions.
1989 	 */
1990 	virtio_mb(vq->weak_barriers);
1991 
1992 	old = vq->packed.next_avail_idx - vq->num_added;
1993 	new = vq->packed.next_avail_idx;
1994 	vq->num_added = 0;
1995 
1996 	snapshot.u32 = *(u32 *)vq->packed.vring.device;
1997 	flags = le16_to_cpu(snapshot.flags);
1998 
1999 	LAST_ADD_TIME_CHECK(vq);
2000 	LAST_ADD_TIME_INVALID(vq);
2001 
2002 	if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
2003 		needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
2004 		goto out;
2005 	}
2006 
2007 	off_wrap = le16_to_cpu(snapshot.off_wrap);
2008 
2009 	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
2010 	event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
2011 	if (wrap_counter != vq->packed.avail_wrap_counter)
2012 		event_idx -= vq->packed.vring.num;
2013 
2014 	needs_kick = vring_need_event(event_idx, new, old);
2015 out:
2016 	END_USE(vq);
2017 	return needs_kick;
2018 }
2019 
2020 static void detach_buf_packed_in_order(struct vring_virtqueue *vq,
2021 				       unsigned int id, void **ctx)
2022 {
2023 	struct vring_desc_state_packed *state = NULL;
2024 	struct vring_packed_desc *desc;
2025 	unsigned int i, curr;
2026 
2027 	state = &vq->packed.desc_state[id];
2028 
2029 	/* Clear data ptr. */
2030 	state->data = NULL;
2031 
2032 	vq->vq.num_free += state->num;
2033 
2034 	if (unlikely(vq->use_map_api)) {
2035 		curr = id;
2036 		for (i = 0; i < state->num; i++) {
2037 			vring_unmap_extra_packed(vq,
2038 						 &vq->packed.desc_extra[curr]);
2039 			curr = vq->packed.desc_extra[curr].next;
2040 		}
2041 	}
2042 
2043 	if (vq->indirect) {
2044 		struct vring_desc_extra *extra;
2045 		u32 len, num;
2046 
2047 		/* Free the indirect table, if any, now that it's unmapped. */
2048 		desc = state->indir_desc;
2049 		if (!desc)
2050 			return;
2051 
2052 		if (vq->use_map_api) {
2053 			len = vq->packed.desc_extra[id].len;
2054 			num = len / sizeof(struct vring_packed_desc);
2055 
2056 			extra = (struct vring_desc_extra *)&desc[num];
2057 
2058 			for (i = 0; i < num; i++)
2059 				vring_unmap_extra_packed(vq, &extra[i]);
2060 		}
2061 		kfree(desc);
2062 		state->indir_desc = NULL;
2063 	} else if (ctx) {
2064 		*ctx = state->indir_desc;
2065 	}
2066 }
2067 
2068 static void detach_buf_packed(struct vring_virtqueue *vq,
2069 			      unsigned int id, void **ctx)
2070 {
2071 	struct vring_desc_state_packed *state = &vq->packed.desc_state[id];
2072 
2073 	vq->packed.desc_extra[state->last].next = vq->free_head;
2074 	vq->free_head = id;
2075 
2076 	detach_buf_packed_in_order(vq, id, ctx);
2077 }
2078 
2079 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
2080 				       u16 idx, bool used_wrap_counter)
2081 {
2082 	u16 flags;
2083 	bool avail, used;
2084 
2085 	flags = vring_read_packed_desc_flags(vq, idx);
2086 	avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
2087 	used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
2088 
2089 	return avail == used && used == used_wrap_counter;
2090 }
2091 
2092 static bool virtqueue_poll_packed(const struct vring_virtqueue *vq,
2093 				  unsigned int off_wrap)
2094 {
2095 	bool wrap_counter;
2096 	u16 used_idx;
2097 
2098 	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
2099 	used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
2100 
2101 	return is_used_desc_packed(vq, used_idx, wrap_counter);
2102 }
2103 
2104 static bool more_used_packed(const struct vring_virtqueue *vq)
2105 {
2106 	return virtqueue_poll_packed(vq, READ_ONCE(vq->last_used_idx));
2107 }
2108 
2109 static void update_last_used_idx_packed(struct vring_virtqueue *vq,
2110 					u16 id, u16 last_used,
2111 					u16 used_wrap_counter)
2112 {
2113 	last_used += vq->packed.desc_state[id].num;
2114 	if (unlikely(last_used >= vq->packed.vring.num)) {
2115 		last_used -= vq->packed.vring.num;
2116 		used_wrap_counter ^= 1;
2117 	}
2118 
2119 	last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
2120 	WRITE_ONCE(vq->last_used_idx, last_used);
2121 
2122 	/*
2123 	 * If we expect an interrupt for the next entry, tell host
2124 	 * by writing event index and flush out the write before
2125 	 * the read in the next get_buf call.
2126 	 */
2127 	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
2128 		virtio_store_mb(vq->weak_barriers,
2129 				&vq->packed.vring.driver->off_wrap,
2130 				cpu_to_le16(vq->last_used_idx));
2131 }
2132 
2133 static bool more_used_packed_in_order(const struct vring_virtqueue *vq)
2134 {
2135 	if (vq->batch_last.id != UINT_MAX)
2136 		return true;
2137 
2138 	return virtqueue_poll_packed(vq, READ_ONCE(vq->last_used_idx));
2139 }
2140 
2141 static void *virtqueue_get_buf_ctx_packed_in_order(struct vring_virtqueue *vq,
2142 						   unsigned int *len,
2143 						   void **ctx)
2144 {
2145 	unsigned int num = vq->packed.vring.num;
2146 	u16 last_used, last_used_idx;
2147 	bool used_wrap_counter;
2148 	void *ret;
2149 
2150 	START_USE(vq);
2151 
2152 	if (unlikely(vq->broken)) {
2153 		END_USE(vq);
2154 		return NULL;
2155 	}
2156 
2157 	last_used_idx = vq->last_used_idx;
2158 	used_wrap_counter = packed_used_wrap_counter(last_used_idx);
2159 	last_used = packed_last_used(last_used_idx);
2160 
2161 	if (vq->batch_last.id == UINT_MAX) {
2162 		if (!more_used_packed_in_order(vq)) {
2163 			pr_debug("No more buffers in queue\n");
2164 			END_USE(vq);
2165 			return NULL;
2166 		}
2167 		/* Only get used elements after they have been exposed by host. */
2168 		virtio_rmb(vq->weak_barriers);
2169 		vq->batch_last.id =
2170 			le16_to_cpu(vq->packed.vring.desc[last_used].id);
2171 		vq->batch_last.len =
2172 			le32_to_cpu(vq->packed.vring.desc[last_used].len);
2173 	}
2174 
2175 	if (vq->batch_last.id == last_used) {
2176 		vq->batch_last.id = UINT_MAX;
2177 		*len = vq->batch_last.len;
2178 	} else {
2179 		*len = vq->packed.desc_state[last_used].total_in_len;
2180 	}
2181 
2182 	if (unlikely(last_used >= num)) {
2183 		BAD_RING(vq, "id %u out of range\n", last_used);
2184 		return NULL;
2185 	}
2186 	if (unlikely(!vq->packed.desc_state[last_used].data)) {
2187 		BAD_RING(vq, "id %u is not a head!\n", last_used);
2188 		return NULL;
2189 	}
2190 
2191 	/* detach_buf_packed clears data, so grab it now. */
2192 	ret = vq->packed.desc_state[last_used].data;
2193 	detach_buf_packed_in_order(vq, last_used, ctx);
2194 
2195 	update_last_used_idx_packed(vq, last_used, last_used,
2196 				    used_wrap_counter);
2197 
2198 	LAST_ADD_TIME_INVALID(vq);
2199 
2200 	END_USE(vq);
2201 	return ret;
2202 }
2203 
2204 static void *virtqueue_get_buf_ctx_packed(struct vring_virtqueue *vq,
2205 					  unsigned int *len,
2206 					  void **ctx)
2207 {
2208 	unsigned int num = vq->packed.vring.num;
2209 	u16 last_used, id, last_used_idx;
2210 	bool used_wrap_counter;
2211 	void *ret;
2212 
2213 	START_USE(vq);
2214 
2215 	if (unlikely(vq->broken)) {
2216 		END_USE(vq);
2217 		return NULL;
2218 	}
2219 
2220 	if (!more_used_packed(vq)) {
2221 		pr_debug("No more buffers in queue\n");
2222 		END_USE(vq);
2223 		return NULL;
2224 	}
2225 
2226 	/* Only get used elements after they have been exposed by host. */
2227 	virtio_rmb(vq->weak_barriers);
2228 
2229 	last_used_idx = READ_ONCE(vq->last_used_idx);
2230 	used_wrap_counter = packed_used_wrap_counter(last_used_idx);
2231 	last_used = packed_last_used(last_used_idx);
2232 	id = vring_read_packed_desc_id(vq, last_used);
2233 	*len = vring_read_packed_desc_len(vq, last_used);
2234 
2235 	if (unlikely(id >= num)) {
2236 		BAD_RING(vq, "id %u out of range\n", id);
2237 		return NULL;
2238 	}
2239 	if (unlikely(!vq->packed.desc_state[id].data)) {
2240 		BAD_RING(vq, "id %u is not a head!\n", id);
2241 		return NULL;
2242 	}
2243 
2244 	/* detach_buf_packed clears data, so grab it now. */
2245 	ret = vq->packed.desc_state[id].data;
2246 	detach_buf_packed(vq, id, ctx);
2247 
2248 	update_last_used_idx_packed(vq, id, last_used, used_wrap_counter);
2249 
2250 	LAST_ADD_TIME_INVALID(vq);
2251 
2252 	END_USE(vq);
2253 	return ret;
2254 }
2255 
2256 static void virtqueue_disable_cb_packed(struct vring_virtqueue *vq)
2257 {
2258 	if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
2259 		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
2260 
2261 		/*
2262 		 * If device triggered an event already it won't trigger one again:
2263 		 * no need to disable.
2264 		 */
2265 		if (vq->event_triggered)
2266 			return;
2267 
2268 		vq->packed.vring.driver->flags =
2269 			cpu_to_le16(vq->packed.event_flags_shadow);
2270 	}
2271 }
2272 
2273 static unsigned int virtqueue_enable_cb_prepare_packed(struct vring_virtqueue *vq)
2274 {
2275 	START_USE(vq);
2276 
2277 	/*
2278 	 * We optimistically turn back on interrupts, then check if there was
2279 	 * more to do.
2280 	 */
2281 
2282 	if (vq->event) {
2283 		vq->packed.vring.driver->off_wrap =
2284 			cpu_to_le16(vq->last_used_idx);
2285 		/*
2286 		 * We need to update event offset and event wrap
2287 		 * counter first before updating event flags.
2288 		 */
2289 		virtio_wmb(vq->weak_barriers);
2290 	}
2291 
2292 	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
2293 		vq->packed.event_flags_shadow = vq->event ?
2294 				VRING_PACKED_EVENT_FLAG_DESC :
2295 				VRING_PACKED_EVENT_FLAG_ENABLE;
2296 		vq->packed.vring.driver->flags =
2297 				cpu_to_le16(vq->packed.event_flags_shadow);
2298 	}
2299 
2300 	END_USE(vq);
2301 	return vq->last_used_idx;
2302 }
2303 
2304 static bool virtqueue_enable_cb_delayed_packed(struct vring_virtqueue *vq)
2305 {
2306 	u16 used_idx, wrap_counter, last_used_idx;
2307 	u16 bufs;
2308 
2309 	START_USE(vq);
2310 
2311 	/*
2312 	 * We optimistically turn back on interrupts, then check if there was
2313 	 * more to do.
2314 	 */
2315 
2316 	if (vq->event) {
2317 		/* TODO: tune this threshold */
2318 		bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
2319 		last_used_idx = READ_ONCE(vq->last_used_idx);
2320 		wrap_counter = packed_used_wrap_counter(last_used_idx);
2321 
2322 		used_idx = packed_last_used(last_used_idx) + bufs;
2323 		if (used_idx >= vq->packed.vring.num) {
2324 			used_idx -= vq->packed.vring.num;
2325 			wrap_counter ^= 1;
2326 		}
2327 
2328 		vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
2329 			(wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
2330 
2331 		/*
2332 		 * We need to update event offset and event wrap
2333 		 * counter first before updating event flags.
2334 		 */
2335 		virtio_wmb(vq->weak_barriers);
2336 	}
2337 
2338 	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
2339 		vq->packed.event_flags_shadow = vq->event ?
2340 				VRING_PACKED_EVENT_FLAG_DESC :
2341 				VRING_PACKED_EVENT_FLAG_ENABLE;
2342 		vq->packed.vring.driver->flags =
2343 				cpu_to_le16(vq->packed.event_flags_shadow);
2344 	}
2345 
2346 	/*
2347 	 * We need to update event suppression structure first
2348 	 * before re-checking for more used buffers.
2349 	 */
2350 	virtio_mb(vq->weak_barriers);
2351 
2352 	last_used_idx = READ_ONCE(vq->last_used_idx);
2353 	wrap_counter = packed_used_wrap_counter(last_used_idx);
2354 	used_idx = packed_last_used(last_used_idx);
2355 	if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
2356 		END_USE(vq);
2357 		return false;
2358 	}
2359 
2360 	END_USE(vq);
2361 	return true;
2362 }
2363 
2364 static void *virtqueue_detach_unused_buf_packed(struct vring_virtqueue *vq)
2365 {
2366 	unsigned int i;
2367 	void *buf;
2368 
2369 	START_USE(vq);
2370 
2371 	for (i = 0; i < vq->packed.vring.num; i++) {
2372 		if (!vq->packed.desc_state[i].data)
2373 			continue;
2374 		/* detach_buf clears data, so grab it now. */
2375 		buf = vq->packed.desc_state[i].data;
2376 		if (virtqueue_is_in_order(vq))
2377 			detach_buf_packed_in_order(vq, i, NULL);
2378 		else
2379 			detach_buf_packed(vq, i, NULL);
2380 		END_USE(vq);
2381 		return buf;
2382 	}
2383 	/* That should have freed everything. */
2384 	BUG_ON(vq->vq.num_free != vq->packed.vring.num);
2385 
2386 	END_USE(vq);
2387 	return NULL;
2388 }
2389 
2390 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
2391 {
2392 	struct vring_desc_extra *desc_extra;
2393 	unsigned int i;
2394 
2395 	desc_extra = kmalloc_objs(struct vring_desc_extra, num);
2396 	if (!desc_extra)
2397 		return NULL;
2398 
2399 	memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
2400 
2401 	for (i = 0; i < num - 1; i++)
2402 		desc_extra[i].next = i + 1;
2403 
2404 	desc_extra[num - 1].next = 0;
2405 
2406 	return desc_extra;
2407 }
2408 
2409 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
2410 			      struct virtio_device *vdev,
2411 			      union virtio_map map)
2412 {
2413 	if (vring_packed->vring.desc)
2414 		vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
2415 				 vring_packed->vring.desc,
2416 				 vring_packed->ring_dma_addr,
2417 				 map);
2418 
2419 	if (vring_packed->vring.driver)
2420 		vring_free_queue(vdev, vring_packed->event_size_in_bytes,
2421 				 vring_packed->vring.driver,
2422 				 vring_packed->driver_event_dma_addr,
2423 				 map);
2424 
2425 	if (vring_packed->vring.device)
2426 		vring_free_queue(vdev, vring_packed->event_size_in_bytes,
2427 				 vring_packed->vring.device,
2428 				 vring_packed->device_event_dma_addr,
2429 				 map);
2430 
2431 	kfree(vring_packed->desc_state);
2432 	kfree(vring_packed->desc_extra);
2433 }
2434 
2435 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
2436 				    struct virtio_device *vdev,
2437 				    u32 num, union virtio_map map)
2438 {
2439 	struct vring_packed_desc *ring;
2440 	struct vring_packed_desc_event *driver, *device;
2441 	dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
2442 	size_t ring_size_in_bytes, event_size_in_bytes;
2443 
2444 	ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
2445 
2446 	ring = vring_alloc_queue(vdev, ring_size_in_bytes,
2447 				 &ring_dma_addr,
2448 				 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
2449 				 map);
2450 	if (!ring)
2451 		goto err;
2452 
2453 	vring_packed->vring.desc         = ring;
2454 	vring_packed->ring_dma_addr      = ring_dma_addr;
2455 	vring_packed->ring_size_in_bytes = ring_size_in_bytes;
2456 
2457 	event_size_in_bytes = sizeof(struct vring_packed_desc_event);
2458 
2459 	driver = vring_alloc_queue(vdev, event_size_in_bytes,
2460 				   &driver_event_dma_addr,
2461 				   GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
2462 				   map);
2463 	if (!driver)
2464 		goto err;
2465 
2466 	vring_packed->vring.driver          = driver;
2467 	vring_packed->event_size_in_bytes   = event_size_in_bytes;
2468 	vring_packed->driver_event_dma_addr = driver_event_dma_addr;
2469 
2470 	device = vring_alloc_queue(vdev, event_size_in_bytes,
2471 				   &device_event_dma_addr,
2472 				   GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
2473 				   map);
2474 	if (!device)
2475 		goto err;
2476 
2477 	vring_packed->vring.device          = device;
2478 	vring_packed->device_event_dma_addr = device_event_dma_addr;
2479 
2480 	vring_packed->vring.num = num;
2481 
2482 	return 0;
2483 
2484 err:
2485 	vring_free_packed(vring_packed, vdev, map);
2486 	return -ENOMEM;
2487 }
2488 
2489 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
2490 {
2491 	struct vring_desc_state_packed *state;
2492 	struct vring_desc_extra *extra;
2493 	u32 num = vring_packed->vring.num;
2494 
2495 	state = kmalloc_objs(struct vring_desc_state_packed, num);
2496 	if (!state)
2497 		goto err_desc_state;
2498 
2499 	memset(state, 0, num * sizeof(struct vring_desc_state_packed));
2500 
2501 	extra = vring_alloc_desc_extra(num);
2502 	if (!extra)
2503 		goto err_desc_extra;
2504 
2505 	vring_packed->desc_state = state;
2506 	vring_packed->desc_extra = extra;
2507 
2508 	return 0;
2509 
2510 err_desc_extra:
2511 	kfree(state);
2512 err_desc_state:
2513 	return -ENOMEM;
2514 }
2515 
2516 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
2517 					bool callback)
2518 {
2519 	vring_packed->next_avail_idx = 0;
2520 	vring_packed->avail_wrap_counter = 1;
2521 	vring_packed->event_flags_shadow = 0;
2522 	vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
2523 
2524 	/* No callback?  Tell other side not to bother us. */
2525 	if (!callback) {
2526 		vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
2527 		vring_packed->vring.driver->flags =
2528 			cpu_to_le16(vring_packed->event_flags_shadow);
2529 	}
2530 }
2531 
2532 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
2533 					  struct vring_virtqueue_packed *vring_packed)
2534 {
2535 	vq->packed = *vring_packed;
2536 
2537 	if (virtqueue_is_in_order(vq)) {
2538 		vq->batch_last.id = UINT_MAX;
2539 	} else {
2540 		/*
2541 		 * Put everything in free lists. Note that
2542 		 * next_avail_idx is sufficient with IN_ORDER so
2543 		 * free_head is unused.
2544 		 */
2545 		vq->free_head = 0;
2546 	}
2547 }
2548 static void virtqueue_reset_packed(struct vring_virtqueue *vq)
2549 {
2550 	memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
2551 	memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
2552 
2553 	/* we need to reset the desc.flags. For more, see is_used_desc_packed() */
2554 	memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
2555 	virtqueue_init(vq, vq->packed.vring.num);
2556 	virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
2557 }
2558 
2559 static const struct virtqueue_ops packed_ops;
2560 
2561 static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index,
2562 					       struct vring_virtqueue_packed *vring_packed,
2563 					       struct virtio_device *vdev,
2564 					       bool weak_barriers,
2565 					       bool context,
2566 					       bool (*notify)(struct virtqueue *),
2567 					       void (*callback)(struct virtqueue *),
2568 					       const char *name,
2569 					       union virtio_map map)
2570 {
2571 	struct vring_virtqueue *vq;
2572 	int err;
2573 
2574 	vq = kmalloc_obj(*vq);
2575 	if (!vq)
2576 		return NULL;
2577 
2578 	vq->vq.callback = callback;
2579 	vq->vq.vdev = vdev;
2580 	vq->vq.name = name;
2581 	vq->vq.index = index;
2582 	vq->vq.reset = false;
2583 	vq->we_own_ring = false;
2584 	vq->notify = notify;
2585 	vq->weak_barriers = weak_barriers;
2586 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2587 	vq->broken = true;
2588 #else
2589 	vq->broken = false;
2590 #endif
2591 	vq->map = map;
2592 	vq->use_map_api = vring_use_map_api(vdev);
2593 
2594 	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2595 		!context;
2596 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2597 	vq->layout = virtio_has_feature(vdev, VIRTIO_F_IN_ORDER) ?
2598 		     VQ_LAYOUT_PACKED_IN_ORDER : VQ_LAYOUT_PACKED;
2599 
2600 	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2601 		vq->weak_barriers = false;
2602 
2603 	err = vring_alloc_state_extra_packed(vring_packed);
2604 	if (err) {
2605 		kfree(vq);
2606 		return NULL;
2607 	}
2608 
2609 	virtqueue_vring_init_packed(vring_packed, !!callback);
2610 
2611 	virtqueue_init(vq, vring_packed->vring.num);
2612 	virtqueue_vring_attach_packed(vq, vring_packed);
2613 
2614 	spin_lock(&vdev->vqs_list_lock);
2615 	list_add_tail(&vq->vq.list, &vdev->vqs);
2616 	spin_unlock(&vdev->vqs_list_lock);
2617 	return &vq->vq;
2618 }
2619 
2620 static struct virtqueue *vring_create_virtqueue_packed(
2621 	unsigned int index,
2622 	unsigned int num,
2623 	unsigned int vring_align,
2624 	struct virtio_device *vdev,
2625 	bool weak_barriers,
2626 	bool may_reduce_num,
2627 	bool context,
2628 	bool (*notify)(struct virtqueue *),
2629 	void (*callback)(struct virtqueue *),
2630 	const char *name,
2631 	union virtio_map map)
2632 {
2633 	struct vring_virtqueue_packed vring_packed = {};
2634 	struct virtqueue *vq;
2635 
2636 	if (vring_alloc_queue_packed(&vring_packed, vdev, num, map))
2637 		return NULL;
2638 
2639 	vq = __vring_new_virtqueue_packed(index, &vring_packed, vdev, weak_barriers,
2640 					context, notify, callback, name, map);
2641 	if (!vq) {
2642 		vring_free_packed(&vring_packed, vdev, map);
2643 		return NULL;
2644 	}
2645 
2646 	to_vvq(vq)->we_own_ring = true;
2647 
2648 	return vq;
2649 }
2650 
2651 static int virtqueue_resize_packed(struct vring_virtqueue *vq, u32 num)
2652 {
2653 	struct vring_virtqueue_packed vring_packed = {};
2654 	struct virtio_device *vdev = vq->vq.vdev;
2655 	int err;
2656 
2657 	if (vring_alloc_queue_packed(&vring_packed, vdev, num, vq->map))
2658 		goto err_ring;
2659 
2660 	err = vring_alloc_state_extra_packed(&vring_packed);
2661 	if (err)
2662 		goto err_state_extra;
2663 
2664 	vring_free(&vq->vq);
2665 
2666 	virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
2667 
2668 	virtqueue_init(vq, vring_packed.vring.num);
2669 	virtqueue_vring_attach_packed(vq, &vring_packed);
2670 
2671 	return 0;
2672 
2673 err_state_extra:
2674 	vring_free_packed(&vring_packed, vdev, vq->map);
2675 err_ring:
2676 	virtqueue_reset_packed(vq);
2677 	return -ENOMEM;
2678 }
2679 
2680 static const struct virtqueue_ops split_ops = {
2681 	.add = virtqueue_add_split,
2682 	.get = virtqueue_get_buf_ctx_split,
2683 	.kick_prepare = virtqueue_kick_prepare_split,
2684 	.disable_cb = virtqueue_disable_cb_split,
2685 	.enable_cb_delayed = virtqueue_enable_cb_delayed_split,
2686 	.enable_cb_prepare = virtqueue_enable_cb_prepare_split,
2687 	.poll = virtqueue_poll_split,
2688 	.detach_unused_buf = virtqueue_detach_unused_buf_split,
2689 	.more_used = more_used_split,
2690 	.resize = virtqueue_resize_split,
2691 	.reset = virtqueue_reset_split,
2692 };
2693 
2694 static const struct virtqueue_ops packed_ops = {
2695 	.add = virtqueue_add_packed,
2696 	.get = virtqueue_get_buf_ctx_packed,
2697 	.kick_prepare = virtqueue_kick_prepare_packed,
2698 	.disable_cb = virtqueue_disable_cb_packed,
2699 	.enable_cb_delayed = virtqueue_enable_cb_delayed_packed,
2700 	.enable_cb_prepare = virtqueue_enable_cb_prepare_packed,
2701 	.poll = virtqueue_poll_packed,
2702 	.detach_unused_buf = virtqueue_detach_unused_buf_packed,
2703 	.more_used = more_used_packed,
2704 	.resize = virtqueue_resize_packed,
2705 	.reset = virtqueue_reset_packed,
2706 };
2707 
2708 static const struct virtqueue_ops split_in_order_ops = {
2709 	.add = virtqueue_add_split,
2710 	.get = virtqueue_get_buf_ctx_split_in_order,
2711 	.kick_prepare = virtqueue_kick_prepare_split,
2712 	.disable_cb = virtqueue_disable_cb_split,
2713 	.enable_cb_delayed = virtqueue_enable_cb_delayed_split,
2714 	.enable_cb_prepare = virtqueue_enable_cb_prepare_split,
2715 	.poll = virtqueue_poll_split,
2716 	.detach_unused_buf = virtqueue_detach_unused_buf_split,
2717 	.more_used = more_used_split_in_order,
2718 	.resize = virtqueue_resize_split,
2719 	.reset = virtqueue_reset_split,
2720 };
2721 
2722 static const struct virtqueue_ops packed_in_order_ops = {
2723 	.add = virtqueue_add_packed_in_order,
2724 	.get = virtqueue_get_buf_ctx_packed_in_order,
2725 	.kick_prepare = virtqueue_kick_prepare_packed,
2726 	.disable_cb = virtqueue_disable_cb_packed,
2727 	.enable_cb_delayed = virtqueue_enable_cb_delayed_packed,
2728 	.enable_cb_prepare = virtqueue_enable_cb_prepare_packed,
2729 	.poll = virtqueue_poll_packed,
2730 	.detach_unused_buf = virtqueue_detach_unused_buf_packed,
2731 	.more_used = more_used_packed_in_order,
2732 	.resize = virtqueue_resize_packed,
2733 	.reset = virtqueue_reset_packed,
2734 };
2735 
2736 static int virtqueue_disable_and_recycle(struct virtqueue *_vq,
2737 					 void (*recycle)(struct virtqueue *vq, void *buf))
2738 {
2739 	struct vring_virtqueue *vq = to_vvq(_vq);
2740 	struct virtio_device *vdev = vq->vq.vdev;
2741 	void *buf;
2742 	int err;
2743 
2744 	if (!vq->we_own_ring)
2745 		return -EPERM;
2746 
2747 	if (!vdev->config->disable_vq_and_reset)
2748 		return -ENOENT;
2749 
2750 	if (!vdev->config->enable_vq_after_reset)
2751 		return -ENOENT;
2752 
2753 	err = vdev->config->disable_vq_and_reset(_vq);
2754 	if (err)
2755 		return err;
2756 
2757 	while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
2758 		recycle(_vq, buf);
2759 
2760 	return 0;
2761 }
2762 
2763 static int virtqueue_enable_after_reset(struct virtqueue *_vq)
2764 {
2765 	struct vring_virtqueue *vq = to_vvq(_vq);
2766 	struct virtio_device *vdev = vq->vq.vdev;
2767 
2768 	if (vdev->config->enable_vq_after_reset(_vq))
2769 		return -EBUSY;
2770 
2771 	return 0;
2772 }
2773 
2774 /*
2775  * Generic functions and exported symbols.
2776  */
2777 
2778 #define VIRTQUEUE_CALL(vq, op, ...)					\
2779 	({								\
2780 	typeof(vq) __VIRTQUEUE_CALL_vq = (vq);				\
2781 	typeof(split_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__)) ret;	\
2782 									\
2783 	switch (__VIRTQUEUE_CALL_vq->layout) {				\
2784 	case VQ_LAYOUT_SPLIT:						\
2785 		ret = split_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__);	\
2786 		break;							\
2787 	case VQ_LAYOUT_PACKED:						\
2788 		ret = packed_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__);\
2789 		break;							\
2790 	case VQ_LAYOUT_SPLIT_IN_ORDER:					\
2791 		ret = split_in_order_ops.op(vq, ##__VA_ARGS__);		\
2792 		break;							\
2793 	case VQ_LAYOUT_PACKED_IN_ORDER:					\
2794 		ret = packed_in_order_ops.op(vq, ##__VA_ARGS__);	\
2795 		break;							\
2796 	default:							\
2797 		BUG();							\
2798 		break;							\
2799 	}								\
2800 	ret;								\
2801 })
2802 
2803 #define VOID_VIRTQUEUE_CALL(vq, op, ...)				\
2804 	({								\
2805 	typeof(vq) __VIRTQUEUE_CALL_vq = (vq);				\
2806 									\
2807 	switch (__VIRTQUEUE_CALL_vq->layout) {				\
2808 	case VQ_LAYOUT_SPLIT:						\
2809 		split_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__);	\
2810 		break;							\
2811 	case VQ_LAYOUT_PACKED:						\
2812 		packed_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__);	\
2813 		break;							\
2814 	case VQ_LAYOUT_SPLIT_IN_ORDER:					\
2815 		split_in_order_ops.op(vq, ##__VA_ARGS__);		\
2816 		break;							\
2817 	case VQ_LAYOUT_PACKED_IN_ORDER:					\
2818 		packed_in_order_ops.op(vq, ##__VA_ARGS__);		\
2819 		break;							\
2820 	default:							\
2821 		BUG();							\
2822 		break;							\
2823 	}								\
2824 })
2825 
2826 static inline int virtqueue_add(struct virtqueue *_vq,
2827 				struct scatterlist *sgs[],
2828 				unsigned int total_sg,
2829 				unsigned int out_sgs,
2830 				unsigned int in_sgs,
2831 				void *data,
2832 				void *ctx,
2833 				bool premapped,
2834 				gfp_t gfp,
2835 				unsigned long attr)
2836 {
2837 	struct vring_virtqueue *vq = to_vvq(_vq);
2838 
2839 	return VIRTQUEUE_CALL(vq, add, sgs, total_sg,
2840 			      out_sgs, in_sgs, data,
2841 			      ctx, premapped, gfp, attr);
2842 }
2843 
2844 /**
2845  * virtqueue_add_sgs - expose buffers to other end
2846  * @_vq: the struct virtqueue we're talking about.
2847  * @sgs: array of terminated scatterlists.
2848  * @out_sgs: the number of scatterlists readable by other side
2849  * @in_sgs: the number of scatterlists which are writable (after readable ones)
2850  * @data: the token identifying the buffer.
2851  * @gfp: how to do memory allocations (if necessary).
2852  *
2853  * Caller must ensure we don't call this with other virtqueue operations
2854  * at the same time (except where noted).
2855  *
2856  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2857  *
2858  * NB: ENOSPC is a special code that is only returned on an attempt to add a
2859  * buffer to a full VQ. It indicates that some buffers are outstanding and that
2860  * the operation can be retried after some buffers have been used.
2861  */
2862 int virtqueue_add_sgs(struct virtqueue *_vq,
2863 		      struct scatterlist *sgs[],
2864 		      unsigned int out_sgs,
2865 		      unsigned int in_sgs,
2866 		      void *data,
2867 		      gfp_t gfp)
2868 {
2869 	unsigned int i, total_sg = 0;
2870 
2871 	/* Count them first. */
2872 	for (i = 0; i < out_sgs + in_sgs; i++) {
2873 		struct scatterlist *sg;
2874 
2875 		for (sg = sgs[i]; sg; sg = sg_next(sg))
2876 			total_sg++;
2877 	}
2878 	return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
2879 			     data, NULL, false, gfp, 0);
2880 }
2881 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
2882 
2883 /**
2884  * virtqueue_add_outbuf - expose output buffers to other end
2885  * @vq: the struct virtqueue we're talking about.
2886  * @sg: scatterlist (must be well-formed and terminated!)
2887  * @num: the number of entries in @sg readable by other side
2888  * @data: the token identifying the buffer.
2889  * @gfp: how to do memory allocations (if necessary).
2890  *
2891  * Caller must ensure we don't call this with other virtqueue operations
2892  * at the same time (except where noted).
2893  *
2894  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2895  */
2896 int virtqueue_add_outbuf(struct virtqueue *vq,
2897 			 struct scatterlist *sg, unsigned int num,
2898 			 void *data,
2899 			 gfp_t gfp)
2900 {
2901 	return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, false, gfp, 0);
2902 }
2903 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
2904 
2905 /**
2906  * virtqueue_add_outbuf_premapped - expose output buffers to other end
2907  * @vq: the struct virtqueue we're talking about.
2908  * @sg: scatterlist (must be well-formed and terminated!)
2909  * @num: the number of entries in @sg readable by other side
2910  * @data: the token identifying the buffer.
2911  * @gfp: how to do memory allocations (if necessary).
2912  *
2913  * Caller must ensure we don't call this with other virtqueue operations
2914  * at the same time (except where noted).
2915  *
2916  * Return:
2917  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2918  */
2919 int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
2920 				   struct scatterlist *sg, unsigned int num,
2921 				   void *data,
2922 				   gfp_t gfp)
2923 {
2924 	return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, true, gfp, 0);
2925 }
2926 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped);
2927 
2928 /**
2929  * virtqueue_add_inbuf - expose input buffers to other end
2930  * @vq: the struct virtqueue we're talking about.
2931  * @sg: scatterlist (must be well-formed and terminated!)
2932  * @num: the number of entries in @sg writable by other side
2933  * @data: the token identifying the buffer.
2934  * @gfp: how to do memory allocations (if necessary).
2935  *
2936  * Caller must ensure we don't call this with other virtqueue operations
2937  * at the same time (except where noted).
2938  *
2939  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2940  */
2941 int virtqueue_add_inbuf(struct virtqueue *vq,
2942 			struct scatterlist *sg, unsigned int num,
2943 			void *data,
2944 			gfp_t gfp)
2945 {
2946 	return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp, 0);
2947 }
2948 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
2949 
2950 /**
2951  * virtqueue_add_inbuf_cache_clean - expose input buffers with cache clean
2952  * @vq: the struct virtqueue we're talking about.
2953  * @sg: scatterlist (must be well-formed and terminated!)
2954  * @num: the number of entries in @sg writable by other side
2955  * @data: the token identifying the buffer.
2956  * @gfp: how to do memory allocations (if necessary).
2957  *
2958  * Same as virtqueue_add_inbuf but passes DMA_ATTR_DEBUGGING_IGNORE_CACHELINES
2959  * to indicate that the CPU will not dirty any cacheline overlapping this buffer
2960  * while it is available, and to suppress overlapping cacheline warnings in DMA
2961  * debug builds.
2962  *
2963  * Caller must ensure we don't call this with other virtqueue operations
2964  * at the same time (except where noted).
2965  *
2966  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2967  */
2968 int virtqueue_add_inbuf_cache_clean(struct virtqueue *vq,
2969 				    struct scatterlist *sg, unsigned int num,
2970 				    void *data,
2971 				    gfp_t gfp)
2972 {
2973 	return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp,
2974 			     DMA_ATTR_DEBUGGING_IGNORE_CACHELINES);
2975 }
2976 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_cache_clean);
2977 
2978 /**
2979  * virtqueue_add_inbuf_ctx - expose input buffers to other end
2980  * @vq: the struct virtqueue we're talking about.
2981  * @sg: scatterlist (must be well-formed and terminated!)
2982  * @num: the number of entries in @sg writable by other side
2983  * @data: the token identifying the buffer.
2984  * @ctx: extra context for the token
2985  * @gfp: how to do memory allocations (if necessary).
2986  *
2987  * Caller must ensure we don't call this with other virtqueue operations
2988  * at the same time (except where noted).
2989  *
2990  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2991  */
2992 int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2993 			struct scatterlist *sg, unsigned int num,
2994 			void *data,
2995 			void *ctx,
2996 			gfp_t gfp)
2997 {
2998 	return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, false, gfp, 0);
2999 }
3000 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
3001 
3002 /**
3003  * virtqueue_add_inbuf_premapped - expose input buffers to other end
3004  * @vq: the struct virtqueue we're talking about.
3005  * @sg: scatterlist (must be well-formed and terminated!)
3006  * @num: the number of entries in @sg writable by other side
3007  * @data: the token identifying the buffer.
3008  * @ctx: extra context for the token
3009  * @gfp: how to do memory allocations (if necessary).
3010  *
3011  * Caller must ensure we don't call this with other virtqueue operations
3012  * at the same time (except where noted).
3013  *
3014  * Return:
3015  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
3016  */
3017 int virtqueue_add_inbuf_premapped(struct virtqueue *vq,
3018 				  struct scatterlist *sg, unsigned int num,
3019 				  void *data,
3020 				  void *ctx,
3021 				  gfp_t gfp)
3022 {
3023 	return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, true, gfp, 0);
3024 }
3025 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_premapped);
3026 
3027 /**
3028  * virtqueue_dma_dev - get the dma dev
3029  * @_vq: the struct virtqueue we're talking about.
3030  *
3031  * Returns the dma dev. That can been used for dma api.
3032  */
3033 struct device *virtqueue_dma_dev(struct virtqueue *_vq)
3034 {
3035 	struct vring_virtqueue *vq = to_vvq(_vq);
3036 
3037 	if (vq->use_map_api && !_vq->vdev->map)
3038 		return vq->map.dma_dev;
3039 	else
3040 		return NULL;
3041 }
3042 EXPORT_SYMBOL_GPL(virtqueue_dma_dev);
3043 
3044 /**
3045  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
3046  * @_vq: the struct virtqueue
3047  *
3048  * Instead of virtqueue_kick(), you can do:
3049  *	if (virtqueue_kick_prepare(vq))
3050  *		virtqueue_notify(vq);
3051  *
3052  * This is sometimes useful because the virtqueue_kick_prepare() needs
3053  * to be serialized, but the actual virtqueue_notify() call does not.
3054  */
3055 bool virtqueue_kick_prepare(struct virtqueue *_vq)
3056 {
3057 	struct vring_virtqueue *vq = to_vvq(_vq);
3058 
3059 	return VIRTQUEUE_CALL(vq, kick_prepare);
3060 }
3061 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
3062 
3063 /**
3064  * virtqueue_notify - second half of split virtqueue_kick call.
3065  * @_vq: the struct virtqueue
3066  *
3067  * This does not need to be serialized.
3068  *
3069  * Returns false if host notify failed or queue is broken, otherwise true.
3070  */
3071 bool virtqueue_notify(struct virtqueue *_vq)
3072 {
3073 	struct vring_virtqueue *vq = to_vvq(_vq);
3074 
3075 	if (unlikely(vq->broken))
3076 		return false;
3077 
3078 	/* Prod other side to tell it about changes. */
3079 	if (!vq->notify(_vq)) {
3080 		vq->broken = true;
3081 		return false;
3082 	}
3083 	return true;
3084 }
3085 EXPORT_SYMBOL_GPL(virtqueue_notify);
3086 
3087 /**
3088  * virtqueue_kick - update after add_buf
3089  * @vq: the struct virtqueue
3090  *
3091  * After one or more virtqueue_add_* calls, invoke this to kick
3092  * the other side.
3093  *
3094  * Caller must ensure we don't call this with other virtqueue
3095  * operations at the same time (except where noted).
3096  *
3097  * Returns false if kick failed, otherwise true.
3098  */
3099 bool virtqueue_kick(struct virtqueue *vq)
3100 {
3101 	if (virtqueue_kick_prepare(vq))
3102 		return virtqueue_notify(vq);
3103 	return true;
3104 }
3105 EXPORT_SYMBOL_GPL(virtqueue_kick);
3106 
3107 /**
3108  * virtqueue_get_buf_ctx - get the next used buffer
3109  * @_vq: the struct virtqueue we're talking about.
3110  * @len: the length written into the buffer
3111  * @ctx: extra context for the token
3112  *
3113  * If the device wrote data into the buffer, @len will be set to the
3114  * amount written.  This means you don't need to clear the buffer
3115  * beforehand to ensure there's no data leakage in the case of short
3116  * writes.
3117  *
3118  * Caller must ensure we don't call this with other virtqueue
3119  * operations at the same time (except where noted).
3120  *
3121  * Returns NULL if there are no used buffers, or the "data" token
3122  * handed to virtqueue_add_*().
3123  */
3124 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
3125 			    void **ctx)
3126 {
3127 	struct vring_virtqueue *vq = to_vvq(_vq);
3128 
3129 	return VIRTQUEUE_CALL(vq, get, len, ctx);
3130 }
3131 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
3132 
3133 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
3134 {
3135 	return virtqueue_get_buf_ctx(_vq, len, NULL);
3136 }
3137 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
3138 /**
3139  * virtqueue_disable_cb - disable callbacks
3140  * @_vq: the struct virtqueue we're talking about.
3141  *
3142  * Note that this is not necessarily synchronous, hence unreliable and only
3143  * useful as an optimization.
3144  *
3145  * Unlike other operations, this need not be serialized.
3146  */
3147 void virtqueue_disable_cb(struct virtqueue *_vq)
3148 {
3149 	struct vring_virtqueue *vq = to_vvq(_vq);
3150 
3151 	VOID_VIRTQUEUE_CALL(vq, disable_cb);
3152 }
3153 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
3154 
3155 /**
3156  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
3157  * @_vq: the struct virtqueue we're talking about.
3158  *
3159  * This re-enables callbacks; it returns current queue state
3160  * in an opaque unsigned value. This value should be later tested by
3161  * virtqueue_poll, to detect a possible race between the driver checking for
3162  * more work, and enabling callbacks.
3163  *
3164  * Caller must ensure we don't call this with other virtqueue
3165  * operations at the same time (except where noted).
3166  */
3167 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
3168 {
3169 	struct vring_virtqueue *vq = to_vvq(_vq);
3170 
3171 	if (vq->event_triggered)
3172 		vq->event_triggered = false;
3173 
3174 	return VIRTQUEUE_CALL(vq, enable_cb_prepare);
3175 }
3176 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
3177 
3178 /**
3179  * virtqueue_poll - query pending used buffers
3180  * @_vq: the struct virtqueue we're talking about.
3181  * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
3182  *
3183  * Returns "true" if there are pending used buffers in the queue.
3184  *
3185  * This does not need to be serialized.
3186  */
3187 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
3188 {
3189 	struct vring_virtqueue *vq = to_vvq(_vq);
3190 
3191 	if (unlikely(vq->broken))
3192 		return false;
3193 
3194 	virtio_mb(vq->weak_barriers);
3195 
3196 	return VIRTQUEUE_CALL(vq, poll, last_used_idx);
3197 }
3198 EXPORT_SYMBOL_GPL(virtqueue_poll);
3199 
3200 /**
3201  * virtqueue_enable_cb - restart callbacks after disable_cb.
3202  * @_vq: the struct virtqueue we're talking about.
3203  *
3204  * This re-enables callbacks; it returns "false" if there are pending
3205  * buffers in the queue, to detect a possible race between the driver
3206  * checking for more work, and enabling callbacks.
3207  *
3208  * Caller must ensure we don't call this with other virtqueue
3209  * operations at the same time (except where noted).
3210  */
3211 bool virtqueue_enable_cb(struct virtqueue *_vq)
3212 {
3213 	unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
3214 
3215 	return !virtqueue_poll(_vq, last_used_idx);
3216 }
3217 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
3218 
3219 /**
3220  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
3221  * @_vq: the struct virtqueue we're talking about.
3222  *
3223  * This re-enables callbacks but hints to the other side to delay
3224  * interrupts until most of the available buffers have been processed;
3225  * it returns "false" if there are many pending buffers in the queue,
3226  * to detect a possible race between the driver checking for more work,
3227  * and enabling callbacks.
3228  *
3229  * Caller must ensure we don't call this with other virtqueue
3230  * operations at the same time (except where noted).
3231  */
3232 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
3233 {
3234 	struct vring_virtqueue *vq = to_vvq(_vq);
3235 
3236 	if (vq->event_triggered)
3237 		data_race(vq->event_triggered = false);
3238 
3239 	return VIRTQUEUE_CALL(vq, enable_cb_delayed);
3240 }
3241 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
3242 
3243 /**
3244  * virtqueue_detach_unused_buf - detach first unused buffer
3245  * @_vq: the struct virtqueue we're talking about.
3246  *
3247  * Returns NULL or the "data" token handed to virtqueue_add_*().
3248  * This is not valid on an active queue; it is useful for device
3249  * shutdown or the reset queue.
3250  */
3251 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
3252 {
3253 	struct vring_virtqueue *vq = to_vvq(_vq);
3254 
3255 	return VIRTQUEUE_CALL(vq, detach_unused_buf);
3256 }
3257 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
3258 
3259 static inline bool more_used(const struct vring_virtqueue *vq)
3260 {
3261 	return VIRTQUEUE_CALL(vq, more_used);
3262 }
3263 
3264 /**
3265  * vring_interrupt - notify a virtqueue on an interrupt
3266  * @irq: the IRQ number (ignored)
3267  * @_vq: the struct virtqueue to notify
3268  *
3269  * Calls the callback function of @_vq to process the virtqueue
3270  * notification.
3271  */
3272 irqreturn_t vring_interrupt(int irq, void *_vq)
3273 {
3274 	struct vring_virtqueue *vq = to_vvq(_vq);
3275 
3276 	if (!more_used(vq)) {
3277 		pr_debug("virtqueue interrupt with no work for %p\n", vq);
3278 		return IRQ_NONE;
3279 	}
3280 
3281 	if (unlikely(vq->broken)) {
3282 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
3283 		dev_warn_once(&vq->vq.vdev->dev,
3284 			      "virtio vring IRQ raised before DRIVER_OK");
3285 		return IRQ_NONE;
3286 #else
3287 		return IRQ_HANDLED;
3288 #endif
3289 	}
3290 
3291 	/* Just a hint for performance: so it's ok that this can be racy! */
3292 	if (vq->event)
3293 		data_race(vq->event_triggered = true);
3294 
3295 	pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
3296 	if (vq->vq.callback)
3297 		vq->vq.callback(&vq->vq);
3298 
3299 	return IRQ_HANDLED;
3300 }
3301 EXPORT_SYMBOL_GPL(vring_interrupt);
3302 
3303 struct virtqueue *vring_create_virtqueue(
3304 	unsigned int index,
3305 	unsigned int num,
3306 	unsigned int vring_align,
3307 	struct virtio_device *vdev,
3308 	bool weak_barriers,
3309 	bool may_reduce_num,
3310 	bool context,
3311 	bool (*notify)(struct virtqueue *),
3312 	void (*callback)(struct virtqueue *),
3313 	const char *name)
3314 {
3315 	union virtio_map map = {.dma_dev = vdev->dev.parent};
3316 
3317 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
3318 		return vring_create_virtqueue_packed(index, num, vring_align,
3319 				vdev, weak_barriers, may_reduce_num,
3320 				context, notify, callback, name, map);
3321 
3322 	return vring_create_virtqueue_split(index, num, vring_align,
3323 			vdev, weak_barriers, may_reduce_num,
3324 			context, notify, callback, name, map);
3325 }
3326 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
3327 
3328 struct virtqueue *vring_create_virtqueue_map(
3329 	unsigned int index,
3330 	unsigned int num,
3331 	unsigned int vring_align,
3332 	struct virtio_device *vdev,
3333 	bool weak_barriers,
3334 	bool may_reduce_num,
3335 	bool context,
3336 	bool (*notify)(struct virtqueue *),
3337 	void (*callback)(struct virtqueue *),
3338 	const char *name,
3339 	union virtio_map map)
3340 {
3341 
3342 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
3343 		return vring_create_virtqueue_packed(index, num, vring_align,
3344 				vdev, weak_barriers, may_reduce_num,
3345 				context, notify, callback, name, map);
3346 
3347 	return vring_create_virtqueue_split(index, num, vring_align,
3348 			vdev, weak_barriers, may_reduce_num,
3349 			context, notify, callback, name, map);
3350 }
3351 EXPORT_SYMBOL_GPL(vring_create_virtqueue_map);
3352 
3353 /**
3354  * virtqueue_resize - resize the vring of vq
3355  * @_vq: the struct virtqueue we're talking about.
3356  * @num: new ring num
3357  * @recycle: callback to recycle unused buffers
3358  * @recycle_done: callback to be invoked when recycle for all unused buffers done
3359  *
3360  * When it is really necessary to create a new vring, it will set the current vq
3361  * into the reset state. Then call the passed callback to recycle the buffer
3362  * that is no longer used. Only after the new vring is successfully created, the
3363  * old vring will be released.
3364  *
3365  * Caller must ensure we don't call this with other virtqueue operations
3366  * at the same time (except where noted).
3367  *
3368  * Returns zero or a negative error.
3369  * 0: success.
3370  * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
3371  *  vq can still work normally
3372  * -EBUSY: Failed to sync with device, vq may not work properly
3373  * -ENOENT: Transport or device not supported
3374  * -E2BIG/-EINVAL: num error
3375  * -EPERM: Operation not permitted
3376  *
3377  */
3378 int virtqueue_resize(struct virtqueue *_vq, u32 num,
3379 		     void (*recycle)(struct virtqueue *vq, void *buf),
3380 		     void (*recycle_done)(struct virtqueue *vq))
3381 {
3382 	struct vring_virtqueue *vq = to_vvq(_vq);
3383 	int err, err_reset;
3384 
3385 	if (num > vq->vq.num_max)
3386 		return -E2BIG;
3387 
3388 	if (!num)
3389 		return -EINVAL;
3390 
3391 	if (virtqueue_get_vring_size(_vq) == num)
3392 		return 0;
3393 
3394 	err = virtqueue_disable_and_recycle(_vq, recycle);
3395 	if (err)
3396 		return err;
3397 	if (recycle_done)
3398 		recycle_done(_vq);
3399 
3400 	err = VIRTQUEUE_CALL(vq, resize, num);
3401 
3402 	err_reset = virtqueue_enable_after_reset(_vq);
3403 	if (err_reset)
3404 		return err_reset;
3405 
3406 	return err;
3407 }
3408 EXPORT_SYMBOL_GPL(virtqueue_resize);
3409 
3410 /**
3411  * virtqueue_reset - detach and recycle all unused buffers
3412  * @_vq: the struct virtqueue we're talking about.
3413  * @recycle: callback to recycle unused buffers
3414  * @recycle_done: callback to be invoked when recycle for all unused buffers done
3415  *
3416  * Caller must ensure we don't call this with other virtqueue operations
3417  * at the same time (except where noted).
3418  *
3419  * Returns zero or a negative error.
3420  * 0: success.
3421  * -EBUSY: Failed to sync with device, vq may not work properly
3422  * -ENOENT: Transport or device not supported
3423  * -EPERM: Operation not permitted
3424  */
3425 int virtqueue_reset(struct virtqueue *_vq,
3426 		    void (*recycle)(struct virtqueue *vq, void *buf),
3427 		    void (*recycle_done)(struct virtqueue *vq))
3428 {
3429 	struct vring_virtqueue *vq = to_vvq(_vq);
3430 	int err;
3431 
3432 	err = virtqueue_disable_and_recycle(_vq, recycle);
3433 	if (err)
3434 		return err;
3435 	if (recycle_done)
3436 		recycle_done(_vq);
3437 
3438 	VOID_VIRTQUEUE_CALL(vq, reset);
3439 
3440 	return virtqueue_enable_after_reset(_vq);
3441 }
3442 EXPORT_SYMBOL_GPL(virtqueue_reset);
3443 
3444 struct virtqueue *vring_new_virtqueue(unsigned int index,
3445 				      unsigned int num,
3446 				      unsigned int vring_align,
3447 				      struct virtio_device *vdev,
3448 				      bool weak_barriers,
3449 				      bool context,
3450 				      void *pages,
3451 				      bool (*notify)(struct virtqueue *vq),
3452 				      void (*callback)(struct virtqueue *vq),
3453 				      const char *name)
3454 {
3455 	struct vring_virtqueue_split vring_split = {};
3456 	union virtio_map map = {.dma_dev = vdev->dev.parent};
3457 
3458 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3459 		struct vring_virtqueue_packed vring_packed = {};
3460 
3461 		vring_packed.vring.num = num;
3462 		vring_packed.vring.desc = pages;
3463 		return __vring_new_virtqueue_packed(index, &vring_packed,
3464 						    vdev, weak_barriers,
3465 						    context, notify, callback,
3466 						    name, map);
3467 	}
3468 
3469 	vring_init(&vring_split.vring, num, pages, vring_align);
3470 	return __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers,
3471 				     context, notify, callback, name,
3472 				     map);
3473 }
3474 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
3475 
3476 static void vring_free(struct virtqueue *_vq)
3477 {
3478 	struct vring_virtqueue *vq = to_vvq(_vq);
3479 
3480 	if (vq->we_own_ring) {
3481 		if (virtqueue_is_packed(vq)) {
3482 			vring_free_queue(vq->vq.vdev,
3483 					 vq->packed.ring_size_in_bytes,
3484 					 vq->packed.vring.desc,
3485 					 vq->packed.ring_dma_addr,
3486 					 vq->map);
3487 
3488 			vring_free_queue(vq->vq.vdev,
3489 					 vq->packed.event_size_in_bytes,
3490 					 vq->packed.vring.driver,
3491 					 vq->packed.driver_event_dma_addr,
3492 					 vq->map);
3493 
3494 			vring_free_queue(vq->vq.vdev,
3495 					 vq->packed.event_size_in_bytes,
3496 					 vq->packed.vring.device,
3497 					 vq->packed.device_event_dma_addr,
3498 					 vq->map);
3499 
3500 			kfree(vq->packed.desc_state);
3501 			kfree(vq->packed.desc_extra);
3502 		} else {
3503 			vring_free_queue(vq->vq.vdev,
3504 					 vq->split.queue_size_in_bytes,
3505 					 vq->split.vring.desc,
3506 					 vq->split.queue_dma_addr,
3507 					 vq->map);
3508 		}
3509 	}
3510 	if (!virtqueue_is_packed(vq)) {
3511 		kfree(vq->split.desc_state);
3512 		kfree(vq->split.desc_extra);
3513 	}
3514 }
3515 
3516 void vring_del_virtqueue(struct virtqueue *_vq)
3517 {
3518 	struct vring_virtqueue *vq = to_vvq(_vq);
3519 
3520 	spin_lock(&vq->vq.vdev->vqs_list_lock);
3521 	list_del(&_vq->list);
3522 	spin_unlock(&vq->vq.vdev->vqs_list_lock);
3523 
3524 	vring_free(_vq);
3525 
3526 	kfree(vq);
3527 }
3528 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
3529 
3530 u32 vring_notification_data(struct virtqueue *_vq)
3531 {
3532 	struct vring_virtqueue *vq = to_vvq(_vq);
3533 	u16 next;
3534 
3535 	if (virtqueue_is_packed(vq))
3536 		next = (vq->packed.next_avail_idx &
3537 				~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) |
3538 			vq->packed.avail_wrap_counter <<
3539 				VRING_PACKED_EVENT_F_WRAP_CTR;
3540 	else
3541 		next = vq->split.avail_idx_shadow;
3542 
3543 	return next << 16 | _vq->index;
3544 }
3545 EXPORT_SYMBOL_GPL(vring_notification_data);
3546 
3547 /* Manipulates transport-specific feature bits. */
3548 void vring_transport_features(struct virtio_device *vdev)
3549 {
3550 	unsigned int i;
3551 
3552 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
3553 		switch (i) {
3554 		case VIRTIO_RING_F_INDIRECT_DESC:
3555 			break;
3556 		case VIRTIO_RING_F_EVENT_IDX:
3557 			break;
3558 		case VIRTIO_F_VERSION_1:
3559 			break;
3560 		case VIRTIO_F_ACCESS_PLATFORM:
3561 			break;
3562 		case VIRTIO_F_RING_PACKED:
3563 			break;
3564 		case VIRTIO_F_ORDER_PLATFORM:
3565 			break;
3566 		case VIRTIO_F_NOTIFICATION_DATA:
3567 			break;
3568 		case VIRTIO_F_IN_ORDER:
3569 			break;
3570 		default:
3571 			/* We don't understand this bit. */
3572 			__virtio_clear_bit(vdev, i);
3573 		}
3574 	}
3575 }
3576 EXPORT_SYMBOL_GPL(vring_transport_features);
3577 
3578 /**
3579  * virtqueue_get_vring_size - return the size of the virtqueue's vring
3580  * @_vq: the struct virtqueue containing the vring of interest.
3581  *
3582  * Returns the size of the vring.  This is mainly used for boasting to
3583  * userspace.  Unlike other operations, this need not be serialized.
3584  */
3585 unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq)
3586 {
3587 
3588 	const struct vring_virtqueue *vq = to_vvq(_vq);
3589 
3590 	return virtqueue_is_packed(vq) ? vq->packed.vring.num :
3591 				      vq->split.vring.num;
3592 }
3593 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
3594 
3595 /*
3596  * This function should only be called by the core, not directly by the driver.
3597  */
3598 void __virtqueue_break(struct virtqueue *_vq)
3599 {
3600 	struct vring_virtqueue *vq = to_vvq(_vq);
3601 
3602 	/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3603 	WRITE_ONCE(vq->broken, true);
3604 }
3605 EXPORT_SYMBOL_GPL(__virtqueue_break);
3606 
3607 /*
3608  * This function should only be called by the core, not directly by the driver.
3609  */
3610 void __virtqueue_unbreak(struct virtqueue *_vq)
3611 {
3612 	struct vring_virtqueue *vq = to_vvq(_vq);
3613 
3614 	/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3615 	WRITE_ONCE(vq->broken, false);
3616 }
3617 EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
3618 
3619 bool virtqueue_is_broken(const struct virtqueue *_vq)
3620 {
3621 	const struct vring_virtqueue *vq = to_vvq(_vq);
3622 
3623 	return READ_ONCE(vq->broken);
3624 }
3625 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
3626 
3627 /*
3628  * This should prevent the device from being used, allowing drivers to
3629  * recover.  You may need to grab appropriate locks to flush.
3630  */
3631 void virtio_break_device(struct virtio_device *dev)
3632 {
3633 	struct virtqueue *_vq;
3634 
3635 	spin_lock(&dev->vqs_list_lock);
3636 	list_for_each_entry(_vq, &dev->vqs, list) {
3637 		struct vring_virtqueue *vq = to_vvq(_vq);
3638 
3639 		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3640 		WRITE_ONCE(vq->broken, true);
3641 	}
3642 	spin_unlock(&dev->vqs_list_lock);
3643 }
3644 EXPORT_SYMBOL_GPL(virtio_break_device);
3645 
3646 /*
3647  * This should allow the device to be used by the driver. You may
3648  * need to grab appropriate locks to flush the write to
3649  * vq->broken. This should only be used in some specific case e.g
3650  * (probing and restoring). This function should only be called by the
3651  * core, not directly by the driver.
3652  */
3653 void __virtio_unbreak_device(struct virtio_device *dev)
3654 {
3655 	struct virtqueue *_vq;
3656 
3657 	spin_lock(&dev->vqs_list_lock);
3658 	list_for_each_entry(_vq, &dev->vqs, list) {
3659 		struct vring_virtqueue *vq = to_vvq(_vq);
3660 
3661 		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3662 		WRITE_ONCE(vq->broken, false);
3663 	}
3664 	spin_unlock(&dev->vqs_list_lock);
3665 }
3666 EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
3667 
3668 dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq)
3669 {
3670 	const struct vring_virtqueue *vq = to_vvq(_vq);
3671 
3672 	BUG_ON(!vq->we_own_ring);
3673 
3674 	if (virtqueue_is_packed(vq))
3675 		return vq->packed.ring_dma_addr;
3676 
3677 	return vq->split.queue_dma_addr;
3678 }
3679 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
3680 
3681 dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq)
3682 {
3683 	const struct vring_virtqueue *vq = to_vvq(_vq);
3684 
3685 	BUG_ON(!vq->we_own_ring);
3686 
3687 	if (virtqueue_is_packed(vq))
3688 		return vq->packed.driver_event_dma_addr;
3689 
3690 	return vq->split.queue_dma_addr +
3691 		((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
3692 }
3693 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
3694 
3695 dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq)
3696 {
3697 	const struct vring_virtqueue *vq = to_vvq(_vq);
3698 
3699 	BUG_ON(!vq->we_own_ring);
3700 
3701 	if (virtqueue_is_packed(vq))
3702 		return vq->packed.device_event_dma_addr;
3703 
3704 	return vq->split.queue_dma_addr +
3705 		((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
3706 }
3707 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
3708 
3709 /* Only available for split ring */
3710 const struct vring *virtqueue_get_vring(const struct virtqueue *vq)
3711 {
3712 	return &to_vvq(vq)->split.vring;
3713 }
3714 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
3715 
3716 /**
3717  * virtqueue_map_alloc_coherent - alloc coherent mapping
3718  * @vdev: the virtio device we are talking to
3719  * @map: metadata for performing mapping
3720  * @size: the size of the buffer
3721  * @map_handle: the pointer to the mapped address
3722  * @gfp: allocation flag (GFP_XXX)
3723  *
3724  * return virtual address or NULL on error
3725  */
3726 void *virtqueue_map_alloc_coherent(struct virtio_device *vdev,
3727 				   union virtio_map map,
3728 				   size_t size, dma_addr_t *map_handle,
3729 				   gfp_t gfp)
3730 {
3731 	if (vdev->map)
3732 		return vdev->map->alloc(map, size,
3733 					map_handle, gfp);
3734 	else
3735 		return dma_alloc_coherent(map.dma_dev, size,
3736 					  map_handle, gfp);
3737 }
3738 EXPORT_SYMBOL_GPL(virtqueue_map_alloc_coherent);
3739 
3740 /**
3741  * virtqueue_map_free_coherent - free coherent mapping
3742  * @vdev: the virtio device we are talking to
3743  * @map: metadata for performing mapping
3744  * @size: the size of the buffer
3745  * @vaddr: the virtual address that needs to be freed
3746  * @map_handle: the mapped address that needs to be freed
3747  *
3748  */
3749 void virtqueue_map_free_coherent(struct virtio_device *vdev,
3750 				 union virtio_map map, size_t size, void *vaddr,
3751 				 dma_addr_t map_handle)
3752 {
3753 	if (vdev->map)
3754 		vdev->map->free(map, size, vaddr,
3755 				map_handle, 0);
3756 	else
3757 		dma_free_coherent(map.dma_dev, size, vaddr, map_handle);
3758 }
3759 EXPORT_SYMBOL_GPL(virtqueue_map_free_coherent);
3760 
3761 /**
3762  * virtqueue_map_page_attrs - map a page to the device
3763  * @_vq: the virtqueue we are talking to
3764  * @page: the page that will be mapped by the device
3765  * @offset: the offset in the page for a buffer
3766  * @size: the buffer size
3767  * @dir: mapping direction
3768  * @attrs: mapping attributes
3769  *
3770  * Returns mapped address. Caller should check that by virtqueue_map_mapping_error().
3771  */
3772 dma_addr_t virtqueue_map_page_attrs(const struct virtqueue *_vq,
3773 				    struct page *page,
3774 				    unsigned long offset,
3775 				    size_t size,
3776 				    enum dma_data_direction dir,
3777 				    unsigned long attrs)
3778 {
3779 	const struct vring_virtqueue *vq = to_vvq(_vq);
3780 	struct virtio_device *vdev = _vq->vdev;
3781 
3782 	if (vdev->map)
3783 		return vdev->map->map_page(vq->map,
3784 					   page, offset, size,
3785 					   dir, attrs);
3786 
3787 	return dma_map_page_attrs(vring_dma_dev(vq),
3788 				  page, offset, size,
3789 				  dir, attrs);
3790 }
3791 EXPORT_SYMBOL_GPL(virtqueue_map_page_attrs);
3792 
3793 /**
3794  * virtqueue_unmap_page_attrs - map a page to the device
3795  * @_vq: the virtqueue we are talking to
3796  * @map_handle: the mapped address
3797  * @size: the buffer size
3798  * @dir: mapping direction
3799  * @attrs: unmapping attributes
3800  */
3801 void virtqueue_unmap_page_attrs(const struct virtqueue *_vq,
3802 				dma_addr_t map_handle,
3803 				size_t size, enum dma_data_direction dir,
3804 				unsigned long attrs)
3805 {
3806 	const struct vring_virtqueue *vq = to_vvq(_vq);
3807 	struct virtio_device *vdev = _vq->vdev;
3808 
3809 	if (vdev->map)
3810 		vdev->map->unmap_page(vq->map,
3811 				      map_handle, size, dir, attrs);
3812 	else
3813 		dma_unmap_page_attrs(vring_dma_dev(vq), map_handle,
3814 				     size, dir, attrs);
3815 }
3816 EXPORT_SYMBOL_GPL(virtqueue_unmap_page_attrs);
3817 
3818 /**
3819  * virtqueue_map_single_attrs - map DMA for _vq
3820  * @_vq: the struct virtqueue we're talking about.
3821  * @ptr: the pointer of the buffer to do dma
3822  * @size: the size of the buffer to do dma
3823  * @dir: DMA direction
3824  * @attrs: DMA Attrs
3825  *
3826  * The caller calls this to do dma mapping in advance. The DMA address can be
3827  * passed to this _vq when it is in pre-mapped mode.
3828  *
3829  * return mapped address. Caller should check that by virtqueue_map_mapping_error().
3830  */
3831 dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr,
3832 				      size_t size,
3833 				      enum dma_data_direction dir,
3834 				      unsigned long attrs)
3835 {
3836 	const struct vring_virtqueue *vq = to_vvq(_vq);
3837 
3838 	if (!vq->use_map_api) {
3839 		kmsan_handle_dma(virt_to_phys(ptr), size, dir);
3840 		return (dma_addr_t)virt_to_phys(ptr);
3841 	}
3842 
3843 	/* DMA must never operate on areas that might be remapped. */
3844 	if (dev_WARN_ONCE(&_vq->vdev->dev, is_vmalloc_addr(ptr),
3845 			  "rejecting DMA map of vmalloc memory\n"))
3846 		return DMA_MAPPING_ERROR;
3847 
3848 	return virtqueue_map_page_attrs(&vq->vq, virt_to_page(ptr),
3849 					offset_in_page(ptr), size, dir, attrs);
3850 }
3851 EXPORT_SYMBOL_GPL(virtqueue_map_single_attrs);
3852 
3853 /**
3854  * virtqueue_unmap_single_attrs - unmap map for _vq
3855  * @_vq: the struct virtqueue we're talking about.
3856  * @addr: the dma address to unmap
3857  * @size: the size of the buffer
3858  * @dir: DMA direction
3859  * @attrs: DMA Attrs
3860  *
3861  * Unmap the address that is mapped by the virtqueue_map_* APIs.
3862  *
3863  */
3864 void virtqueue_unmap_single_attrs(const struct virtqueue *_vq,
3865 				  dma_addr_t addr,
3866 				  size_t size, enum dma_data_direction dir,
3867 				  unsigned long attrs)
3868 {
3869 	const struct vring_virtqueue *vq = to_vvq(_vq);
3870 
3871 	if (!vq->use_map_api)
3872 		return;
3873 
3874 	virtqueue_unmap_page_attrs(_vq, addr, size, dir, attrs);
3875 }
3876 EXPORT_SYMBOL_GPL(virtqueue_unmap_single_attrs);
3877 
3878 /**
3879  * virtqueue_map_mapping_error - check dma address
3880  * @_vq: the struct virtqueue we're talking about.
3881  * @addr: DMA address
3882  *
3883  * Returns 0 means dma valid. Other means invalid dma address.
3884  */
3885 int virtqueue_map_mapping_error(const struct virtqueue *_vq, dma_addr_t addr)
3886 {
3887 	const struct vring_virtqueue *vq = to_vvq(_vq);
3888 
3889 	return vring_mapping_error(vq, addr);
3890 }
3891 EXPORT_SYMBOL_GPL(virtqueue_map_mapping_error);
3892 
3893 /**
3894  * virtqueue_map_need_sync - check a dma address needs sync
3895  * @_vq: the struct virtqueue we're talking about.
3896  * @addr: DMA address
3897  *
3898  * Check if the dma address mapped by the virtqueue_map_* APIs needs to be
3899  * synchronized
3900  *
3901  * return bool
3902  */
3903 bool virtqueue_map_need_sync(const struct virtqueue *_vq, dma_addr_t addr)
3904 {
3905 	const struct vring_virtqueue *vq = to_vvq(_vq);
3906 	struct virtio_device *vdev = _vq->vdev;
3907 
3908 	if (!vq->use_map_api)
3909 		return false;
3910 
3911 	if (vdev->map)
3912 		return vdev->map->need_sync(vq->map, addr);
3913 	else
3914 		return dma_need_sync(vring_dma_dev(vq), addr);
3915 }
3916 EXPORT_SYMBOL_GPL(virtqueue_map_need_sync);
3917 
3918 /**
3919  * virtqueue_map_sync_single_range_for_cpu - map sync for cpu
3920  * @_vq: the struct virtqueue we're talking about.
3921  * @addr: DMA address
3922  * @offset: DMA address offset
3923  * @size: buf size for sync
3924  * @dir: DMA direction
3925  *
3926  * Before calling this function, use virtqueue_map_need_sync() to confirm that
3927  * the DMA address really needs to be synchronized
3928  *
3929  */
3930 void virtqueue_map_sync_single_range_for_cpu(const struct virtqueue *_vq,
3931 					     dma_addr_t addr,
3932 					     unsigned long offset, size_t size,
3933 					     enum dma_data_direction dir)
3934 {
3935 	const struct vring_virtqueue *vq = to_vvq(_vq);
3936 	struct virtio_device *vdev = _vq->vdev;
3937 
3938 	if (!vq->use_map_api)
3939 		return;
3940 
3941 	if (vdev->map)
3942 		vdev->map->sync_single_for_cpu(vq->map,
3943 					       addr + offset, size, dir);
3944 	else
3945 		dma_sync_single_range_for_cpu(vring_dma_dev(vq),
3946 					      addr, offset, size, dir);
3947 }
3948 EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_cpu);
3949 
3950 /**
3951  * virtqueue_map_sync_single_range_for_device - map sync for device
3952  * @_vq: the struct virtqueue we're talking about.
3953  * @addr: DMA address
3954  * @offset: DMA address offset
3955  * @size: buf size for sync
3956  * @dir: DMA direction
3957  *
3958  * Before calling this function, use virtqueue_map_need_sync() to confirm that
3959  * the DMA address really needs to be synchronized
3960  */
3961 void virtqueue_map_sync_single_range_for_device(const struct virtqueue *_vq,
3962 						dma_addr_t addr,
3963 						unsigned long offset, size_t size,
3964 						enum dma_data_direction dir)
3965 {
3966 	const struct vring_virtqueue *vq = to_vvq(_vq);
3967 	struct virtio_device *vdev = _vq->vdev;
3968 
3969 	if (!vq->use_map_api)
3970 		return;
3971 
3972 	if (vdev->map)
3973 		vdev->map->sync_single_for_device(vq->map,
3974 						  addr + offset,
3975 						  size, dir);
3976 	else
3977 		dma_sync_single_range_for_device(vring_dma_dev(vq), addr,
3978 						 offset, size, dir);
3979 }
3980 EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_device);
3981 
3982 MODULE_DESCRIPTION("Virtio ring implementation");
3983 MODULE_LICENSE("GPL");
3984