xref: /linux/drivers/virtio/virtio_ring.c (revision 8934827db5403eae57d4537114a9ff88b0a8460f)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Virtio ring implementation.
3  *
4  *  Copyright 2007 Rusty Russell IBM Corporation
5  */
6 #include <linux/virtio.h>
7 #include <linux/virtio_ring.h>
8 #include <linux/virtio_config.h>
9 #include <linux/device.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/hrtimer.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/kmsan.h>
15 #include <linux/spinlock.h>
16 #include <xen/xen.h>
17 
18 #ifdef DEBUG
19 /* For development, we want to crash whenever the ring is screwed. */
20 #define BAD_RING(_vq, fmt, args...)				\
21 	do {							\
22 		dev_err(&(_vq)->vq.vdev->dev,			\
23 			"%s:"fmt, (_vq)->vq.name, ##args);	\
24 		BUG();						\
25 	} while (0)
26 /* Caller is supposed to guarantee no reentry. */
27 #define START_USE(_vq)						\
28 	do {							\
29 		if ((_vq)->in_use)				\
30 			panic("%s:in_use = %i\n",		\
31 			      (_vq)->vq.name, (_vq)->in_use);	\
32 		(_vq)->in_use = __LINE__;			\
33 	} while (0)
34 #define END_USE(_vq) \
35 	do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
36 #define LAST_ADD_TIME_UPDATE(_vq)				\
37 	do {							\
38 		ktime_t now = ktime_get();			\
39 								\
40 		/* No kick or get, with .1 second between?  Warn. */ \
41 		if ((_vq)->last_add_time_valid)			\
42 			WARN_ON(ktime_to_ms(ktime_sub(now,	\
43 				(_vq)->last_add_time)) > 100);	\
44 		(_vq)->last_add_time = now;			\
45 		(_vq)->last_add_time_valid = true;		\
46 	} while (0)
47 #define LAST_ADD_TIME_CHECK(_vq)				\
48 	do {							\
49 		if ((_vq)->last_add_time_valid) {		\
50 			WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
51 				      (_vq)->last_add_time)) > 100); \
52 		}						\
53 	} while (0)
54 #define LAST_ADD_TIME_INVALID(_vq)				\
55 	((_vq)->last_add_time_valid = false)
56 #else
57 #define BAD_RING(_vq, fmt, args...)				\
58 	do {							\
59 		dev_err(&_vq->vq.vdev->dev,			\
60 			"%s:"fmt, (_vq)->vq.name, ##args);	\
61 		(_vq)->broken = true;				\
62 	} while (0)
63 #define START_USE(vq)
64 #define END_USE(vq)
65 #define LAST_ADD_TIME_UPDATE(vq)
66 #define LAST_ADD_TIME_CHECK(vq)
67 #define LAST_ADD_TIME_INVALID(vq)
68 #endif
69 
70 enum vq_layout {
71 	VQ_LAYOUT_SPLIT = 0,
72 	VQ_LAYOUT_PACKED,
73 	VQ_LAYOUT_SPLIT_IN_ORDER,
74 	VQ_LAYOUT_PACKED_IN_ORDER,
75 };
76 
77 struct vring_desc_state_split {
78 	void *data;			/* Data for callback. */
79 
80 	/* Indirect desc table and extra table, if any. These two will be
81 	 * allocated together. So we won't stress more to the memory allocator.
82 	 */
83 	struct vring_desc *indir_desc;
84 	u32 total_in_len;
85 };
86 
87 struct vring_desc_state_packed {
88 	void *data;			/* Data for callback. */
89 
90 	/* Indirect desc table and extra table, if any. These two will be
91 	 * allocated together. So we won't stress more to the memory allocator.
92 	 */
93 	struct vring_packed_desc *indir_desc;
94 	u16 num;			/* Descriptor list length. */
95 	u16 last;			/* The last desc state in a list. */
96 	u32 total_in_len;		/* In length for the skipped buffer. */
97 };
98 
99 struct vring_desc_extra {
100 	dma_addr_t addr;		/* Descriptor DMA addr. */
101 	u32 len;			/* Descriptor length. */
102 	u16 flags;			/* Descriptor flags. */
103 	u16 next;			/* The next desc state in a list. */
104 };
105 
106 struct vring_virtqueue_split {
107 	/* Actual memory layout for this queue. */
108 	struct vring vring;
109 
110 	/* Last written value to avail->flags */
111 	u16 avail_flags_shadow;
112 
113 	/*
114 	 * Last written value to avail->idx in
115 	 * guest byte order.
116 	 */
117 	u16 avail_idx_shadow;
118 
119 	/* Per-descriptor state. */
120 	struct vring_desc_state_split *desc_state;
121 	struct vring_desc_extra *desc_extra;
122 
123 	/* DMA address and size information */
124 	dma_addr_t queue_dma_addr;
125 	size_t queue_size_in_bytes;
126 
127 	/*
128 	 * The parameters for creating vrings are reserved for creating new
129 	 * vring.
130 	 */
131 	u32 vring_align;
132 	bool may_reduce_num;
133 };
134 
135 struct vring_virtqueue_packed {
136 	/* Actual memory layout for this queue. */
137 	struct {
138 		unsigned int num;
139 		struct vring_packed_desc *desc;
140 		struct vring_packed_desc_event *driver;
141 		struct vring_packed_desc_event *device;
142 	} vring;
143 
144 	/* Driver ring wrap counter. */
145 	bool avail_wrap_counter;
146 
147 	/* Avail used flags. */
148 	u16 avail_used_flags;
149 
150 	/* Index of the next avail descriptor. */
151 	u16 next_avail_idx;
152 
153 	/*
154 	 * Last written value to driver->flags in
155 	 * guest byte order.
156 	 */
157 	u16 event_flags_shadow;
158 
159 	/* Per-descriptor state. */
160 	struct vring_desc_state_packed *desc_state;
161 	struct vring_desc_extra *desc_extra;
162 
163 	/* DMA address and size information */
164 	dma_addr_t ring_dma_addr;
165 	dma_addr_t driver_event_dma_addr;
166 	dma_addr_t device_event_dma_addr;
167 	size_t ring_size_in_bytes;
168 	size_t event_size_in_bytes;
169 };
170 
171 struct vring_virtqueue;
172 
173 struct virtqueue_ops {
174 	int (*add)(struct vring_virtqueue *vq, struct scatterlist *sgs[],
175 		   unsigned int total_sg, unsigned int out_sgs,
176 		   unsigned int in_sgs,	void *data,
177 		   void *ctx, bool premapped, gfp_t gfp,
178 		   unsigned long attr);
179 	void *(*get)(struct vring_virtqueue *vq, unsigned int *len, void **ctx);
180 	bool (*kick_prepare)(struct vring_virtqueue *vq);
181 	void (*disable_cb)(struct vring_virtqueue *vq);
182 	bool (*enable_cb_delayed)(struct vring_virtqueue *vq);
183 	unsigned int (*enable_cb_prepare)(struct vring_virtqueue *vq);
184 	bool (*poll)(const struct vring_virtqueue *vq,
185 		     unsigned int last_used_idx);
186 	void *(*detach_unused_buf)(struct vring_virtqueue *vq);
187 	bool (*more_used)(const struct vring_virtqueue *vq);
188 	int (*resize)(struct vring_virtqueue *vq, u32 num);
189 	void (*reset)(struct vring_virtqueue *vq);
190 };
191 
192 struct vring_virtqueue {
193 	struct virtqueue vq;
194 
195 	/* Is DMA API used? */
196 	bool use_map_api;
197 
198 	/* Can we use weak barriers? */
199 	bool weak_barriers;
200 
201 	/* Other side has made a mess, don't try any more. */
202 	bool broken;
203 
204 	/* Host supports indirect buffers */
205 	bool indirect;
206 
207 	/* Host publishes avail event idx */
208 	bool event;
209 
210 	enum vq_layout layout;
211 
212 	/*
213 	 * Without IN_ORDER it's the head of free buffer list. With
214 	 * IN_ORDER and SPLIT, it's the next available buffer
215 	 * index. With IN_ORDER and PACKED, it's unused.
216 	 */
217 	unsigned int free_head;
218 
219 	/*
220 	 * With IN_ORDER, once we see an in-order batch, this stores
221 	 * this last entry, and until we return the last buffer.
222 	 * After this, id is set to UINT_MAX to mark it invalid.
223 	 * Unused without IN_ORDER.
224 	 */
225 	struct used_entry {
226 		u32 id;
227 		u32 len;
228 	} batch_last;
229 
230 	/* Number we've added since last sync. */
231 	unsigned int num_added;
232 
233 	/* Last used index  we've seen.
234 	 * for split ring, it just contains last used index
235 	 * for packed ring:
236 	 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
237 	 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
238 	 */
239 	u16 last_used_idx;
240 
241 	/* With IN_ORDER and SPLIT, last descriptor id we used to
242 	 * detach buffer.
243 	 */
244 	u16 last_used;
245 
246 	/* Hint for event idx: already triggered no need to disable. */
247 	bool event_triggered;
248 
249 	union {
250 		/* Available for split ring */
251 		struct vring_virtqueue_split split;
252 
253 		/* Available for packed ring */
254 		struct vring_virtqueue_packed packed;
255 	};
256 
257 	/* How to notify other side. FIXME: commonalize hcalls! */
258 	bool (*notify)(struct virtqueue *vq);
259 
260 	/* DMA, allocation, and size information */
261 	bool we_own_ring;
262 
263 	union virtio_map map;
264 
265 #ifdef DEBUG
266 	/* They're supposed to lock for us. */
267 	unsigned int in_use;
268 
269 	/* Figure out if their kicks are too delayed. */
270 	bool last_add_time_valid;
271 	ktime_t last_add_time;
272 #endif
273 };
274 
275 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
276 static void vring_free(struct virtqueue *_vq);
277 
278 /*
279  * Helpers.
280  */
281 
282 #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq)
283 
284 
virtqueue_is_packed(const struct vring_virtqueue * vq)285 static inline bool virtqueue_is_packed(const struct vring_virtqueue *vq)
286 {
287 	return vq->layout == VQ_LAYOUT_PACKED ||
288 	       vq->layout == VQ_LAYOUT_PACKED_IN_ORDER;
289 }
290 
virtqueue_is_in_order(const struct vring_virtqueue * vq)291 static inline bool virtqueue_is_in_order(const struct vring_virtqueue *vq)
292 {
293 	return vq->layout == VQ_LAYOUT_SPLIT_IN_ORDER ||
294 	       vq->layout == VQ_LAYOUT_PACKED_IN_ORDER;
295 }
296 
virtqueue_use_indirect(const struct vring_virtqueue * vq,unsigned int total_sg)297 static bool virtqueue_use_indirect(const struct vring_virtqueue *vq,
298 				   unsigned int total_sg)
299 {
300 	/*
301 	 * If the host supports indirect descriptor tables, and we have multiple
302 	 * buffers, then go indirect. FIXME: tune this threshold
303 	 */
304 	return (vq->indirect && total_sg > 1 && vq->vq.num_free);
305 }
306 
307 /*
308  * Modern virtio devices have feature bits to specify whether they need a
309  * quirk and bypass the IOMMU. If not there, just use the DMA API.
310  *
311  * If there, the interaction between virtio and DMA API is messy.
312  *
313  * On most systems with virtio, physical addresses match bus addresses,
314  * and it doesn't particularly matter whether we use the DMA API.
315  *
316  * On some systems, including Xen and any system with a physical device
317  * that speaks virtio behind a physical IOMMU, we must use the DMA API
318  * for virtio DMA to work at all.
319  *
320  * On other systems, including SPARC and PPC64, virtio-pci devices are
321  * enumerated as though they are behind an IOMMU, but the virtio host
322  * ignores the IOMMU, so we must either pretend that the IOMMU isn't
323  * there or somehow map everything as the identity.
324  *
325  * For the time being, we preserve historic behavior and bypass the DMA
326  * API.
327  *
328  * TODO: install a per-device DMA ops structure that does the right thing
329  * taking into account all the above quirks, and use the DMA API
330  * unconditionally on data path.
331  */
332 
vring_use_map_api(const struct virtio_device * vdev)333 static bool vring_use_map_api(const struct virtio_device *vdev)
334 {
335 	if (!virtio_has_dma_quirk(vdev))
336 		return true;
337 
338 	/* Otherwise, we are left to guess. */
339 	/*
340 	 * In theory, it's possible to have a buggy QEMU-supposed
341 	 * emulated Q35 IOMMU and Xen enabled at the same time.  On
342 	 * such a configuration, virtio has never worked and will
343 	 * not work without an even larger kludge.  Instead, enable
344 	 * the DMA API if we're a Xen guest, which at least allows
345 	 * all of the sensible Xen configurations to work correctly.
346 	 */
347 	if (xen_domain())
348 		return true;
349 
350 	return false;
351 }
352 
vring_need_unmap_buffer(const struct vring_virtqueue * vring,const struct vring_desc_extra * extra)353 static bool vring_need_unmap_buffer(const struct vring_virtqueue *vring,
354 				    const struct vring_desc_extra *extra)
355 {
356 	return vring->use_map_api && (extra->addr != DMA_MAPPING_ERROR);
357 }
358 
virtio_max_dma_size(const struct virtio_device * vdev)359 size_t virtio_max_dma_size(const struct virtio_device *vdev)
360 {
361 	size_t max_segment_size = SIZE_MAX;
362 
363 	if (vring_use_map_api(vdev)) {
364 		if (vdev->map) {
365 			max_segment_size =
366 				vdev->map->max_mapping_size(vdev->vmap);
367 		} else
368 			max_segment_size =
369 				dma_max_mapping_size(vdev->dev.parent);
370 	}
371 
372 	return max_segment_size;
373 }
374 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
375 
vring_alloc_queue(struct virtio_device * vdev,size_t size,dma_addr_t * map_handle,gfp_t flag,union virtio_map map)376 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
377 			       dma_addr_t *map_handle, gfp_t flag,
378 			       union virtio_map map)
379 {
380 	if (vring_use_map_api(vdev)) {
381 		return virtqueue_map_alloc_coherent(vdev, map, size,
382 						    map_handle, flag);
383 	} else {
384 		void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
385 
386 		if (queue) {
387 			phys_addr_t phys_addr = virt_to_phys(queue);
388 			*map_handle = (dma_addr_t)phys_addr;
389 
390 			/*
391 			 * Sanity check: make sure we dind't truncate
392 			 * the address.  The only arches I can find that
393 			 * have 64-bit phys_addr_t but 32-bit dma_addr_t
394 			 * are certain non-highmem MIPS and x86
395 			 * configurations, but these configurations
396 			 * should never allocate physical pages above 32
397 			 * bits, so this is fine.  Just in case, throw a
398 			 * warning and abort if we end up with an
399 			 * unrepresentable address.
400 			 */
401 			if (WARN_ON_ONCE(*map_handle != phys_addr)) {
402 				free_pages_exact(queue, PAGE_ALIGN(size));
403 				return NULL;
404 			}
405 		}
406 		return queue;
407 	}
408 }
409 
vring_free_queue(struct virtio_device * vdev,size_t size,void * queue,dma_addr_t map_handle,union virtio_map map)410 static void vring_free_queue(struct virtio_device *vdev, size_t size,
411 			     void *queue, dma_addr_t map_handle,
412 			     union virtio_map map)
413 {
414 	if (vring_use_map_api(vdev))
415 		virtqueue_map_free_coherent(vdev, map, size,
416 					    queue, map_handle);
417 	else
418 		free_pages_exact(queue, PAGE_ALIGN(size));
419 }
420 
421 /*
422  * The DMA ops on various arches are rather gnarly right now, and
423  * making all of the arch DMA ops work on the vring device itself
424  * is a mess.
425  */
vring_dma_dev(const struct vring_virtqueue * vq)426 static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
427 {
428 	return vq->map.dma_dev;
429 }
430 
vring_mapping_error(const struct vring_virtqueue * vq,dma_addr_t addr)431 static int vring_mapping_error(const struct vring_virtqueue *vq,
432 			       dma_addr_t addr)
433 {
434 	struct virtio_device *vdev = vq->vq.vdev;
435 
436 	if (!vq->use_map_api)
437 		return 0;
438 
439 	if (vdev->map)
440 		return vdev->map->mapping_error(vq->map, addr);
441 	else
442 		return dma_mapping_error(vring_dma_dev(vq), addr);
443 }
444 
445 /* Map one sg entry. */
vring_map_one_sg(const struct vring_virtqueue * vq,struct scatterlist * sg,enum dma_data_direction direction,dma_addr_t * addr,u32 * len,bool premapped,unsigned long attr)446 static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
447 			    enum dma_data_direction direction, dma_addr_t *addr,
448 			    u32 *len, bool premapped, unsigned long attr)
449 {
450 	if (premapped) {
451 		*addr = sg_dma_address(sg);
452 		*len = sg_dma_len(sg);
453 		return 0;
454 	}
455 
456 	*len = sg->length;
457 
458 	if (!vq->use_map_api) {
459 		/*
460 		 * If DMA is not used, KMSAN doesn't know that the scatterlist
461 		 * is initialized by the hardware. Explicitly check/unpoison it
462 		 * depending on the direction.
463 		 */
464 		kmsan_handle_dma(sg_phys(sg), sg->length, direction);
465 		*addr = (dma_addr_t)sg_phys(sg);
466 		return 0;
467 	}
468 
469 	/*
470 	 * We can't use dma_map_sg, because we don't use scatterlists in
471 	 * the way it expects (we don't guarantee that the scatterlist
472 	 * will exist for the lifetime of the mapping).
473 	 */
474 	*addr = virtqueue_map_page_attrs(&vq->vq, sg_page(sg),
475 					 sg->offset, sg->length,
476 					 direction, attr);
477 
478 	if (vring_mapping_error(vq, *addr))
479 		return -ENOMEM;
480 
481 	return 0;
482 }
483 
vring_map_single(const struct vring_virtqueue * vq,void * cpu_addr,size_t size,enum dma_data_direction direction)484 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
485 				   void *cpu_addr, size_t size,
486 				   enum dma_data_direction direction)
487 {
488 	if (!vq->use_map_api)
489 		return (dma_addr_t)virt_to_phys(cpu_addr);
490 
491 	return virtqueue_map_single_attrs(&vq->vq, cpu_addr,
492 					  size, direction, 0);
493 }
494 
virtqueue_init(struct vring_virtqueue * vq,u32 num)495 static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
496 {
497 	vq->vq.num_free = num;
498 
499 	if (virtqueue_is_packed(vq))
500 		vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
501 	else
502 		vq->last_used_idx = 0;
503 
504 	vq->last_used = 0;
505 
506 	vq->event_triggered = false;
507 	vq->num_added = 0;
508 
509 #ifdef DEBUG
510 	vq->in_use = false;
511 	vq->last_add_time_valid = false;
512 #endif
513 }
514 
515 
516 /*
517  * Split ring specific functions - *_split().
518  */
519 
vring_unmap_one_split(const struct vring_virtqueue * vq,struct vring_desc_extra * extra)520 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
521 					  struct vring_desc_extra *extra)
522 {
523 	u16 flags;
524 
525 	flags = extra->flags;
526 
527 	if (flags & VRING_DESC_F_INDIRECT) {
528 		if (!vq->use_map_api)
529 			goto out;
530 	} else if (!vring_need_unmap_buffer(vq, extra))
531 		goto out;
532 
533 	virtqueue_unmap_page_attrs(&vq->vq,
534 				   extra->addr,
535 				   extra->len,
536 				   (flags & VRING_DESC_F_WRITE) ?
537 				   DMA_FROM_DEVICE : DMA_TO_DEVICE,
538 				   0);
539 
540 out:
541 	return extra->next;
542 }
543 
alloc_indirect_split(struct vring_virtqueue * vq,unsigned int total_sg,gfp_t gfp)544 static struct vring_desc *alloc_indirect_split(struct vring_virtqueue *vq,
545 					       unsigned int total_sg,
546 					       gfp_t gfp)
547 {
548 	struct vring_desc_extra *extra;
549 	struct vring_desc *desc;
550 	unsigned int i, size;
551 
552 	/*
553 	 * We require lowmem mappings for the descriptors because
554 	 * otherwise virt_to_phys will give us bogus addresses in the
555 	 * virtqueue.
556 	 */
557 	gfp &= ~__GFP_HIGHMEM;
558 
559 	size = sizeof(*desc) * total_sg + sizeof(*extra) * total_sg;
560 
561 	desc = kmalloc(size, gfp);
562 	if (!desc)
563 		return NULL;
564 
565 	extra = (struct vring_desc_extra *)&desc[total_sg];
566 
567 	for (i = 0; i < total_sg; i++)
568 		extra[i].next = i + 1;
569 
570 	return desc;
571 }
572 
virtqueue_add_desc_split(struct vring_virtqueue * vq,struct vring_desc * desc,struct vring_desc_extra * extra,unsigned int i,dma_addr_t addr,unsigned int len,u16 flags,bool premapped)573 static inline unsigned int virtqueue_add_desc_split(struct vring_virtqueue *vq,
574 						    struct vring_desc *desc,
575 						    struct vring_desc_extra *extra,
576 						    unsigned int i,
577 						    dma_addr_t addr,
578 						    unsigned int len,
579 						    u16 flags, bool premapped)
580 {
581 	struct virtio_device *vdev = vq->vq.vdev;
582 	u16 next;
583 
584 	desc[i].flags = cpu_to_virtio16(vdev, flags);
585 	desc[i].addr = cpu_to_virtio64(vdev, addr);
586 	desc[i].len = cpu_to_virtio32(vdev, len);
587 
588 	extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr;
589 	extra[i].len = len;
590 	extra[i].flags = flags;
591 
592 	next = extra[i].next;
593 
594 	desc[i].next = cpu_to_virtio16(vdev, next);
595 
596 	return next;
597 }
598 
virtqueue_add_split(struct vring_virtqueue * vq,struct scatterlist * sgs[],unsigned int total_sg,unsigned int out_sgs,unsigned int in_sgs,void * data,void * ctx,bool premapped,gfp_t gfp,unsigned long attr)599 static inline int virtqueue_add_split(struct vring_virtqueue *vq,
600 				      struct scatterlist *sgs[],
601 				      unsigned int total_sg,
602 				      unsigned int out_sgs,
603 				      unsigned int in_sgs,
604 				      void *data,
605 				      void *ctx,
606 				      bool premapped,
607 				      gfp_t gfp,
608 				      unsigned long attr)
609 {
610 	struct vring_desc_extra *extra;
611 	struct scatterlist *sg;
612 	struct vring_desc *desc;
613 	unsigned int i, n, avail, descs_used, err_idx, sg_count = 0;
614 	/* Total length for in-order */
615 	unsigned int total_in_len = 0;
616 	int head;
617 	bool indirect;
618 
619 	START_USE(vq);
620 
621 	BUG_ON(data == NULL);
622 	BUG_ON(ctx && vq->indirect);
623 
624 	if (unlikely(vq->broken)) {
625 		END_USE(vq);
626 		return -EIO;
627 	}
628 
629 	LAST_ADD_TIME_UPDATE(vq);
630 
631 	BUG_ON(total_sg == 0);
632 
633 	head = vq->free_head;
634 
635 	if (virtqueue_use_indirect(vq, total_sg))
636 		desc = alloc_indirect_split(vq, total_sg, gfp);
637 	else {
638 		desc = NULL;
639 		WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
640 	}
641 
642 	if (desc) {
643 		/* Use a single buffer which doesn't continue */
644 		indirect = true;
645 		/* Set up rest to use this indirect table. */
646 		i = 0;
647 		descs_used = 1;
648 		extra = (struct vring_desc_extra *)&desc[total_sg];
649 	} else {
650 		indirect = false;
651 		desc = vq->split.vring.desc;
652 		extra = vq->split.desc_extra;
653 		i = head;
654 		descs_used = total_sg;
655 	}
656 
657 	if (unlikely(vq->vq.num_free < descs_used)) {
658 		pr_debug("Can't add buf len %i - avail = %i\n",
659 			 descs_used, vq->vq.num_free);
660 		/* FIXME: for historical reasons, we force a notify here if
661 		 * there are outgoing parts to the buffer.  Presumably the
662 		 * host should service the ring ASAP. */
663 		if (out_sgs)
664 			vq->notify(&vq->vq);
665 		if (indirect)
666 			kfree(desc);
667 		END_USE(vq);
668 		return -ENOSPC;
669 	}
670 
671 	for (n = 0; n < out_sgs; n++) {
672 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
673 			dma_addr_t addr;
674 			u32 len;
675 			u16 flags = 0;
676 
677 			if (++sg_count != total_sg)
678 				flags |= VRING_DESC_F_NEXT;
679 
680 			if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr, &len,
681 					     premapped, attr))
682 				goto unmap_release;
683 
684 			/* Note that we trust indirect descriptor
685 			 * table since it use stream DMA mapping.
686 			 */
687 			i = virtqueue_add_desc_split(vq, desc, extra, i, addr,
688 						     len, flags, premapped);
689 		}
690 	}
691 	for (; n < (out_sgs + in_sgs); n++) {
692 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
693 			dma_addr_t addr;
694 			u32 len;
695 			u16 flags = VRING_DESC_F_WRITE;
696 
697 			if (++sg_count != total_sg)
698 				flags |= VRING_DESC_F_NEXT;
699 
700 			if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr, &len,
701 					     premapped, attr))
702 				goto unmap_release;
703 
704 			/* Note that we trust indirect descriptor
705 			 * table since it use stream DMA mapping.
706 			 */
707 			i = virtqueue_add_desc_split(vq, desc, extra, i, addr,
708 						     len, flags, premapped);
709 			total_in_len += len;
710 		}
711 	}
712 
713 	if (indirect) {
714 		/* Now that the indirect table is filled in, map it. */
715 		dma_addr_t addr = vring_map_single(
716 			vq, desc, total_sg * sizeof(struct vring_desc),
717 			DMA_TO_DEVICE);
718 		if (vring_mapping_error(vq, addr))
719 			goto unmap_release;
720 
721 		virtqueue_add_desc_split(vq, vq->split.vring.desc,
722 					 vq->split.desc_extra,
723 					 head, addr,
724 					 total_sg * sizeof(struct vring_desc),
725 					 VRING_DESC_F_INDIRECT, false);
726 	}
727 
728 	/* We're using some buffers from the free list. */
729 	vq->vq.num_free -= descs_used;
730 
731 	/* Update free pointer */
732 	if (virtqueue_is_in_order(vq)) {
733 		vq->free_head += descs_used;
734 		if (vq->free_head >= vq->split.vring.num)
735 			vq->free_head -= vq->split.vring.num;
736 		vq->split.desc_state[head].total_in_len = total_in_len;
737 	} else if (indirect)
738 		vq->free_head = vq->split.desc_extra[head].next;
739 	else
740 		vq->free_head = i;
741 
742 	/* Store token and indirect buffer state. */
743 	vq->split.desc_state[head].data = data;
744 	if (indirect)
745 		vq->split.desc_state[head].indir_desc = desc;
746 	else
747 		vq->split.desc_state[head].indir_desc = ctx;
748 
749 	/* Put entry in available array (but don't update avail->idx until they
750 	 * do sync). */
751 	avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
752 	vq->split.vring.avail->ring[avail] = cpu_to_virtio16(vq->vq.vdev, head);
753 
754 	/* Descriptors and available array need to be set before we expose the
755 	 * new available array entries. */
756 	virtio_wmb(vq->weak_barriers);
757 	vq->split.avail_idx_shadow++;
758 	vq->split.vring.avail->idx = cpu_to_virtio16(vq->vq.vdev,
759 						vq->split.avail_idx_shadow);
760 	vq->num_added++;
761 
762 	pr_debug("Added buffer head %i to %p\n", head, vq);
763 	END_USE(vq);
764 
765 	/* This is very unlikely, but theoretically possible.  Kick
766 	 * just in case. */
767 	if (unlikely(vq->num_added == (1 << 16) - 1))
768 		virtqueue_kick(&vq->vq);
769 
770 	return 0;
771 
772 unmap_release:
773 	err_idx = i;
774 
775 	if (indirect)
776 		i = 0;
777 	else
778 		i = head;
779 
780 	for (n = 0; n < total_sg; n++) {
781 		if (i == err_idx)
782 			break;
783 
784 		i = vring_unmap_one_split(vq, &extra[i]);
785 	}
786 
787 	if (indirect)
788 		kfree(desc);
789 
790 	END_USE(vq);
791 	return -ENOMEM;
792 }
793 
virtqueue_kick_prepare_split(struct vring_virtqueue * vq)794 static bool virtqueue_kick_prepare_split(struct vring_virtqueue *vq)
795 {
796 	u16 new, old;
797 	bool needs_kick;
798 
799 	START_USE(vq);
800 	/* We need to expose available array entries before checking avail
801 	 * event. */
802 	virtio_mb(vq->weak_barriers);
803 
804 	old = vq->split.avail_idx_shadow - vq->num_added;
805 	new = vq->split.avail_idx_shadow;
806 	vq->num_added = 0;
807 
808 	LAST_ADD_TIME_CHECK(vq);
809 	LAST_ADD_TIME_INVALID(vq);
810 
811 	if (vq->event) {
812 		needs_kick = vring_need_event(virtio16_to_cpu(vq->vq.vdev,
813 					vring_avail_event(&vq->split.vring)),
814 					      new, old);
815 	} else {
816 		needs_kick = !(vq->split.vring.used->flags &
817 					cpu_to_virtio16(vq->vq.vdev,
818 						VRING_USED_F_NO_NOTIFY));
819 	}
820 	END_USE(vq);
821 	return needs_kick;
822 }
823 
detach_indirect_split(struct vring_virtqueue * vq,unsigned int head)824 static void detach_indirect_split(struct vring_virtqueue *vq,
825 				  unsigned int head)
826 {
827 	struct vring_desc_extra *extra = vq->split.desc_extra;
828 	struct vring_desc *indir_desc = vq->split.desc_state[head].indir_desc;
829 	unsigned int j;
830 	u32 len, num;
831 
832 	/* Free the indirect table, if any, now that it's unmapped. */
833 	if (!indir_desc)
834 		return;
835 	len = vq->split.desc_extra[head].len;
836 
837 	BUG_ON(!(vq->split.desc_extra[head].flags &
838 			VRING_DESC_F_INDIRECT));
839 	BUG_ON(len == 0 || len % sizeof(struct vring_desc));
840 
841 	num = len / sizeof(struct vring_desc);
842 
843 	extra = (struct vring_desc_extra *)&indir_desc[num];
844 
845 	if (vq->use_map_api) {
846 		for (j = 0; j < num; j++)
847 			vring_unmap_one_split(vq, &extra[j]);
848 	}
849 
850 	kfree(indir_desc);
851 	vq->split.desc_state[head].indir_desc = NULL;
852 }
853 
detach_buf_split_in_order(struct vring_virtqueue * vq,unsigned int head,void ** ctx)854 static unsigned detach_buf_split_in_order(struct vring_virtqueue *vq,
855 					  unsigned int head,
856 					  void **ctx)
857 {
858 	struct vring_desc_extra *extra;
859 	unsigned int i;
860 	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
861 
862 	/* Clear data ptr. */
863 	vq->split.desc_state[head].data = NULL;
864 
865 	extra = vq->split.desc_extra;
866 
867 	/* Put back on free list: unmap first-level descriptors and find end */
868 	i = head;
869 
870 	while (vq->split.vring.desc[i].flags & nextflag) {
871 		i = vring_unmap_one_split(vq, &extra[i]);
872 		vq->vq.num_free++;
873 	}
874 
875 	vring_unmap_one_split(vq, &extra[i]);
876 
877 	/* Plus final descriptor */
878 	vq->vq.num_free++;
879 
880 	if (vq->indirect)
881 		detach_indirect_split(vq, head);
882 	else if (ctx)
883 		*ctx = vq->split.desc_state[head].indir_desc;
884 
885 	return i;
886 }
887 
detach_buf_split(struct vring_virtqueue * vq,unsigned int head,void ** ctx)888 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
889 			     void **ctx)
890 {
891 	unsigned int i = detach_buf_split_in_order(vq, head, ctx);
892 
893 	vq->split.desc_extra[i].next = vq->free_head;
894 	vq->free_head = head;
895 }
896 
virtqueue_poll_split(const struct vring_virtqueue * vq,unsigned int last_used_idx)897 static bool virtqueue_poll_split(const struct vring_virtqueue *vq,
898 				 unsigned int last_used_idx)
899 {
900 	return (u16)last_used_idx != virtio16_to_cpu(vq->vq.vdev,
901 			vq->split.vring.used->idx);
902 }
903 
more_used_split(const struct vring_virtqueue * vq)904 static bool more_used_split(const struct vring_virtqueue *vq)
905 {
906 	return virtqueue_poll_split(vq, vq->last_used_idx);
907 }
908 
more_used_split_in_order(const struct vring_virtqueue * vq)909 static bool more_used_split_in_order(const struct vring_virtqueue *vq)
910 {
911 	if (vq->batch_last.id != UINT_MAX)
912 		return true;
913 
914 	return virtqueue_poll_split(vq, vq->last_used_idx);
915 }
916 
virtqueue_get_buf_ctx_split(struct vring_virtqueue * vq,unsigned int * len,void ** ctx)917 static void *virtqueue_get_buf_ctx_split(struct vring_virtqueue *vq,
918 					 unsigned int *len,
919 					 void **ctx)
920 {
921 	void *ret;
922 	unsigned int i;
923 	u16 last_used;
924 
925 	START_USE(vq);
926 
927 	if (unlikely(vq->broken)) {
928 		END_USE(vq);
929 		return NULL;
930 	}
931 
932 	if (!more_used_split(vq)) {
933 		pr_debug("No more buffers in queue\n");
934 		END_USE(vq);
935 		return NULL;
936 	}
937 
938 	/* Only get used array entries after they have been exposed by host. */
939 	virtio_rmb(vq->weak_barriers);
940 
941 	last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
942 	i = virtio32_to_cpu(vq->vq.vdev,
943 			vq->split.vring.used->ring[last_used].id);
944 	*len = virtio32_to_cpu(vq->vq.vdev,
945 			vq->split.vring.used->ring[last_used].len);
946 
947 	if (unlikely(i >= vq->split.vring.num)) {
948 		BAD_RING(vq, "id %u out of range\n", i);
949 		return NULL;
950 	}
951 	if (unlikely(!vq->split.desc_state[i].data)) {
952 		BAD_RING(vq, "id %u is not a head!\n", i);
953 		return NULL;
954 	}
955 
956 	/* detach_buf_split clears data, so grab it now. */
957 	ret = vq->split.desc_state[i].data;
958 	detach_buf_split(vq, i, ctx);
959 	vq->last_used_idx++;
960 	/* If we expect an interrupt for the next entry, tell host
961 	 * by writing event index and flush out the write before
962 	 * the read in the next get_buf call. */
963 	if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
964 		virtio_store_mb(vq->weak_barriers,
965 				&vring_used_event(&vq->split.vring),
966 				cpu_to_virtio16(vq->vq.vdev, vq->last_used_idx));
967 
968 	LAST_ADD_TIME_INVALID(vq);
969 
970 	END_USE(vq);
971 	return ret;
972 }
973 
virtqueue_get_buf_ctx_split_in_order(struct vring_virtqueue * vq,unsigned int * len,void ** ctx)974 static void *virtqueue_get_buf_ctx_split_in_order(struct vring_virtqueue *vq,
975 						  unsigned int *len,
976 						  void **ctx)
977 {
978 	void *ret;
979 	unsigned int num = vq->split.vring.num;
980 	unsigned int num_free = vq->vq.num_free;
981 	u16 last_used, last_used_idx;
982 
983 	START_USE(vq);
984 
985 	if (unlikely(vq->broken)) {
986 		END_USE(vq);
987 		return NULL;
988 	}
989 
990 	last_used = vq->last_used & (num - 1);
991 	last_used_idx = vq->last_used_idx & (num - 1);
992 
993 	if (vq->batch_last.id == UINT_MAX) {
994 		if (!more_used_split_in_order(vq)) {
995 			pr_debug("No more buffers in queue\n");
996 			END_USE(vq);
997 			return NULL;
998 		}
999 
1000 		/*
1001 		 * Only get used array entries after they have been
1002 		 * exposed by host.
1003 		 */
1004 		virtio_rmb(vq->weak_barriers);
1005 
1006 		vq->batch_last.id = virtio32_to_cpu(vq->vq.vdev,
1007 				    vq->split.vring.used->ring[last_used_idx].id);
1008 		vq->batch_last.len = virtio32_to_cpu(vq->vq.vdev,
1009 				     vq->split.vring.used->ring[last_used_idx].len);
1010 	}
1011 
1012 	if (vq->batch_last.id == last_used) {
1013 		vq->batch_last.id = UINT_MAX;
1014 		*len = vq->batch_last.len;
1015 	} else {
1016 		*len = vq->split.desc_state[last_used].total_in_len;
1017 	}
1018 
1019 	if (unlikely(!vq->split.desc_state[last_used].data)) {
1020 		BAD_RING(vq, "id %u is not a head!\n", last_used);
1021 		return NULL;
1022 	}
1023 
1024 	/* detach_buf_split clears data, so grab it now. */
1025 	ret = vq->split.desc_state[last_used].data;
1026 	detach_buf_split_in_order(vq, last_used, ctx);
1027 
1028 	vq->last_used_idx++;
1029 	vq->last_used += (vq->vq.num_free - num_free);
1030 	/* If we expect an interrupt for the next entry, tell host
1031 	 * by writing event index and flush out the write before
1032 	 * the read in the next get_buf call. */
1033 	if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
1034 		virtio_store_mb(vq->weak_barriers,
1035 				&vring_used_event(&vq->split.vring),
1036 				cpu_to_virtio16(vq->vq.vdev, vq->last_used_idx));
1037 
1038 	LAST_ADD_TIME_INVALID(vq);
1039 
1040 	END_USE(vq);
1041 	return ret;
1042 }
1043 
virtqueue_disable_cb_split(struct vring_virtqueue * vq)1044 static void virtqueue_disable_cb_split(struct vring_virtqueue *vq)
1045 {
1046 	if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
1047 		vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
1048 
1049 		/*
1050 		 * If device triggered an event already it won't trigger one again:
1051 		 * no need to disable.
1052 		 */
1053 		if (vq->event_triggered)
1054 			return;
1055 
1056 		if (vq->event)
1057 			/* TODO: this is a hack. Figure out a cleaner value to write. */
1058 			vring_used_event(&vq->split.vring) = 0x0;
1059 		else
1060 			vq->split.vring.avail->flags =
1061 				cpu_to_virtio16(vq->vq.vdev,
1062 						vq->split.avail_flags_shadow);
1063 	}
1064 }
1065 
virtqueue_enable_cb_prepare_split(struct vring_virtqueue * vq)1066 static unsigned int virtqueue_enable_cb_prepare_split(struct vring_virtqueue *vq)
1067 {
1068 	u16 last_used_idx;
1069 
1070 	START_USE(vq);
1071 
1072 	/* We optimistically turn back on interrupts, then check if there was
1073 	 * more to do. */
1074 	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
1075 	 * either clear the flags bit or point the event index at the next
1076 	 * entry. Always do both to keep code simple. */
1077 	if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
1078 		vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
1079 		if (!vq->event)
1080 			vq->split.vring.avail->flags =
1081 				cpu_to_virtio16(vq->vq.vdev,
1082 						vq->split.avail_flags_shadow);
1083 	}
1084 	vring_used_event(&vq->split.vring) = cpu_to_virtio16(vq->vq.vdev,
1085 			last_used_idx = vq->last_used_idx);
1086 	END_USE(vq);
1087 	return last_used_idx;
1088 }
1089 
virtqueue_enable_cb_delayed_split(struct vring_virtqueue * vq)1090 static bool virtqueue_enable_cb_delayed_split(struct vring_virtqueue *vq)
1091 {
1092 	u16 bufs;
1093 
1094 	START_USE(vq);
1095 
1096 	/* We optimistically turn back on interrupts, then check if there was
1097 	 * more to do. */
1098 	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
1099 	 * either clear the flags bit or point the event index at the next
1100 	 * entry. Always update the event index to keep code simple. */
1101 	if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
1102 		vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
1103 		if (!vq->event)
1104 			vq->split.vring.avail->flags =
1105 				cpu_to_virtio16(vq->vq.vdev,
1106 						vq->split.avail_flags_shadow);
1107 	}
1108 	/* TODO: tune this threshold */
1109 	bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
1110 
1111 	virtio_store_mb(vq->weak_barriers,
1112 			&vring_used_event(&vq->split.vring),
1113 			cpu_to_virtio16(vq->vq.vdev, vq->last_used_idx + bufs));
1114 
1115 	if (unlikely((u16)(virtio16_to_cpu(vq->vq.vdev, vq->split.vring.used->idx)
1116 					- vq->last_used_idx) > bufs)) {
1117 		END_USE(vq);
1118 		return false;
1119 	}
1120 
1121 	END_USE(vq);
1122 	return true;
1123 }
1124 
virtqueue_detach_unused_buf_split(struct vring_virtqueue * vq)1125 static void *virtqueue_detach_unused_buf_split(struct vring_virtqueue *vq)
1126 {
1127 	unsigned int i;
1128 	void *buf;
1129 
1130 	START_USE(vq);
1131 
1132 	for (i = 0; i < vq->split.vring.num; i++) {
1133 		if (!vq->split.desc_state[i].data)
1134 			continue;
1135 		/* detach_buf_split clears data, so grab it now. */
1136 		buf = vq->split.desc_state[i].data;
1137 		if (virtqueue_is_in_order(vq))
1138 			detach_buf_split_in_order(vq, i, NULL);
1139 		else
1140 			detach_buf_split(vq, i, NULL);
1141 		vq->split.avail_idx_shadow--;
1142 		vq->split.vring.avail->idx = cpu_to_virtio16(vq->vq.vdev,
1143 				vq->split.avail_idx_shadow);
1144 		END_USE(vq);
1145 		return buf;
1146 	}
1147 	/* That should have freed everything. */
1148 	BUG_ON(vq->vq.num_free != vq->split.vring.num);
1149 
1150 	END_USE(vq);
1151 	return NULL;
1152 }
1153 
virtqueue_vring_init_split(struct vring_virtqueue_split * vring_split,struct vring_virtqueue * vq)1154 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
1155 				       struct vring_virtqueue *vq)
1156 {
1157 	struct virtio_device *vdev;
1158 
1159 	vdev = vq->vq.vdev;
1160 
1161 	vring_split->avail_flags_shadow = 0;
1162 	vring_split->avail_idx_shadow = 0;
1163 
1164 	/* No callback?  Tell other side not to bother us. */
1165 	if (!vq->vq.callback) {
1166 		vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
1167 		if (!vq->event)
1168 			vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
1169 					vring_split->avail_flags_shadow);
1170 	}
1171 }
1172 
virtqueue_reset_split(struct vring_virtqueue * vq)1173 static void virtqueue_reset_split(struct vring_virtqueue *vq)
1174 {
1175 	int num;
1176 
1177 	num = vq->split.vring.num;
1178 
1179 	vq->split.vring.avail->flags = 0;
1180 	vq->split.vring.avail->idx = 0;
1181 
1182 	/* reset avail event */
1183 	vq->split.vring.avail->ring[num] = 0;
1184 
1185 	vq->split.vring.used->flags = 0;
1186 	vq->split.vring.used->idx = 0;
1187 
1188 	/* reset used event */
1189 	*(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
1190 
1191 	virtqueue_init(vq, num);
1192 
1193 	virtqueue_vring_init_split(&vq->split, vq);
1194 }
1195 
virtqueue_vring_attach_split(struct vring_virtqueue * vq,struct vring_virtqueue_split * vring_split)1196 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
1197 					 struct vring_virtqueue_split *vring_split)
1198 {
1199 	vq->split = *vring_split;
1200 
1201 	/* Put everything in free lists. */
1202 	vq->free_head = 0;
1203 	vq->batch_last.id = UINT_MAX;
1204 }
1205 
vring_alloc_state_extra_split(struct vring_virtqueue_split * vring_split)1206 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
1207 {
1208 	struct vring_desc_state_split *state;
1209 	struct vring_desc_extra *extra;
1210 	u32 num = vring_split->vring.num;
1211 
1212 	state = kmalloc_objs(struct vring_desc_state_split, num, GFP_KERNEL);
1213 	if (!state)
1214 		goto err_state;
1215 
1216 	extra = vring_alloc_desc_extra(num);
1217 	if (!extra)
1218 		goto err_extra;
1219 
1220 	memset(state, 0, num * sizeof(struct vring_desc_state_split));
1221 
1222 	vring_split->desc_state = state;
1223 	vring_split->desc_extra = extra;
1224 	return 0;
1225 
1226 err_extra:
1227 	kfree(state);
1228 err_state:
1229 	return -ENOMEM;
1230 }
1231 
vring_free_split(struct vring_virtqueue_split * vring_split,struct virtio_device * vdev,union virtio_map map)1232 static void vring_free_split(struct vring_virtqueue_split *vring_split,
1233 			     struct virtio_device *vdev,
1234 			     union virtio_map map)
1235 {
1236 	vring_free_queue(vdev, vring_split->queue_size_in_bytes,
1237 			 vring_split->vring.desc,
1238 			 vring_split->queue_dma_addr,
1239 			 map);
1240 
1241 	kfree(vring_split->desc_state);
1242 	kfree(vring_split->desc_extra);
1243 }
1244 
vring_alloc_queue_split(struct vring_virtqueue_split * vring_split,struct virtio_device * vdev,u32 num,unsigned int vring_align,bool may_reduce_num,union virtio_map map)1245 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
1246 				   struct virtio_device *vdev,
1247 				   u32 num,
1248 				   unsigned int vring_align,
1249 				   bool may_reduce_num,
1250 				   union virtio_map map)
1251 {
1252 	void *queue = NULL;
1253 	dma_addr_t dma_addr;
1254 
1255 	/* We assume num is a power of 2. */
1256 	if (!is_power_of_2(num)) {
1257 		dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
1258 		return -EINVAL;
1259 	}
1260 
1261 	/* TODO: allocate each queue chunk individually */
1262 	for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1263 		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1264 					  &dma_addr,
1265 					  GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1266 					  map);
1267 		if (queue)
1268 			break;
1269 		if (!may_reduce_num)
1270 			return -ENOMEM;
1271 	}
1272 
1273 	if (!num)
1274 		return -ENOMEM;
1275 
1276 	if (!queue) {
1277 		/* Try to get a single page. You are my only hope! */
1278 		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1279 					  &dma_addr, GFP_KERNEL | __GFP_ZERO,
1280 					  map);
1281 	}
1282 	if (!queue)
1283 		return -ENOMEM;
1284 
1285 	vring_init(&vring_split->vring, num, queue, vring_align);
1286 
1287 	vring_split->queue_dma_addr = dma_addr;
1288 	vring_split->queue_size_in_bytes = vring_size(num, vring_align);
1289 
1290 	vring_split->vring_align = vring_align;
1291 	vring_split->may_reduce_num = may_reduce_num;
1292 
1293 	return 0;
1294 }
1295 
1296 static const struct virtqueue_ops split_ops;
1297 
__vring_new_virtqueue_split(unsigned int index,struct vring_virtqueue_split * vring_split,struct virtio_device * vdev,bool weak_barriers,bool context,bool (* notify)(struct virtqueue *),void (* callback)(struct virtqueue *),const char * name,union virtio_map map)1298 static struct virtqueue *__vring_new_virtqueue_split(unsigned int index,
1299 					       struct vring_virtqueue_split *vring_split,
1300 					       struct virtio_device *vdev,
1301 					       bool weak_barriers,
1302 					       bool context,
1303 					       bool (*notify)(struct virtqueue *),
1304 					       void (*callback)(struct virtqueue *),
1305 					       const char *name,
1306 					       union virtio_map map)
1307 {
1308 	struct vring_virtqueue *vq;
1309 	int err;
1310 
1311 	vq = kmalloc_obj(*vq, GFP_KERNEL);
1312 	if (!vq)
1313 		return NULL;
1314 
1315 	vq->vq.callback = callback;
1316 	vq->vq.vdev = vdev;
1317 	vq->vq.name = name;
1318 	vq->vq.index = index;
1319 	vq->vq.reset = false;
1320 	vq->we_own_ring = false;
1321 	vq->notify = notify;
1322 	vq->weak_barriers = weak_barriers;
1323 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
1324 	vq->broken = true;
1325 #else
1326 	vq->broken = false;
1327 #endif
1328 	vq->map = map;
1329 	vq->use_map_api = vring_use_map_api(vdev);
1330 
1331 	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1332 		!context;
1333 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1334 	vq->layout = virtio_has_feature(vdev, VIRTIO_F_IN_ORDER) ?
1335 		     VQ_LAYOUT_SPLIT_IN_ORDER : VQ_LAYOUT_SPLIT;
1336 
1337 	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1338 		vq->weak_barriers = false;
1339 
1340 	err = vring_alloc_state_extra_split(vring_split);
1341 	if (err) {
1342 		kfree(vq);
1343 		return NULL;
1344 	}
1345 
1346 	virtqueue_vring_init_split(vring_split, vq);
1347 
1348 	virtqueue_init(vq, vring_split->vring.num);
1349 	virtqueue_vring_attach_split(vq, vring_split);
1350 
1351 	spin_lock(&vdev->vqs_list_lock);
1352 	list_add_tail(&vq->vq.list, &vdev->vqs);
1353 	spin_unlock(&vdev->vqs_list_lock);
1354 	return &vq->vq;
1355 }
1356 
vring_create_virtqueue_split(unsigned int index,unsigned int num,unsigned int vring_align,struct virtio_device * vdev,bool weak_barriers,bool may_reduce_num,bool context,bool (* notify)(struct virtqueue *),void (* callback)(struct virtqueue *),const char * name,union virtio_map map)1357 static struct virtqueue *vring_create_virtqueue_split(
1358 	unsigned int index,
1359 	unsigned int num,
1360 	unsigned int vring_align,
1361 	struct virtio_device *vdev,
1362 	bool weak_barriers,
1363 	bool may_reduce_num,
1364 	bool context,
1365 	bool (*notify)(struct virtqueue *),
1366 	void (*callback)(struct virtqueue *),
1367 	const char *name,
1368 	union virtio_map map)
1369 {
1370 	struct vring_virtqueue_split vring_split = {};
1371 	struct virtqueue *vq;
1372 	int err;
1373 
1374 	err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1375 				      may_reduce_num, map);
1376 	if (err)
1377 		return NULL;
1378 
1379 	vq = __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers,
1380 				   context, notify, callback, name, map);
1381 	if (!vq) {
1382 		vring_free_split(&vring_split, vdev, map);
1383 		return NULL;
1384 	}
1385 
1386 	to_vvq(vq)->we_own_ring = true;
1387 
1388 	return vq;
1389 }
1390 
virtqueue_resize_split(struct vring_virtqueue * vq,u32 num)1391 static int virtqueue_resize_split(struct vring_virtqueue *vq, u32 num)
1392 {
1393 	struct vring_virtqueue_split vring_split = {};
1394 	struct virtio_device *vdev = vq->vq.vdev;
1395 	int err;
1396 
1397 	err = vring_alloc_queue_split(&vring_split, vdev, num,
1398 				      vq->split.vring_align,
1399 				      vq->split.may_reduce_num,
1400 				      vq->map);
1401 	if (err)
1402 		goto err;
1403 
1404 	err = vring_alloc_state_extra_split(&vring_split);
1405 	if (err)
1406 		goto err_state_extra;
1407 
1408 	vring_free(&vq->vq);
1409 
1410 	virtqueue_vring_init_split(&vring_split, vq);
1411 
1412 	virtqueue_init(vq, vring_split.vring.num);
1413 	virtqueue_vring_attach_split(vq, &vring_split);
1414 
1415 	return 0;
1416 
1417 err_state_extra:
1418 	vring_free_split(&vring_split, vdev, vq->map);
1419 err:
1420 	virtqueue_reset_split(vq);
1421 	return -ENOMEM;
1422 }
1423 
1424 
1425 /*
1426  * Packed ring specific functions - *_packed().
1427  */
packed_used_wrap_counter(u16 last_used_idx)1428 static bool packed_used_wrap_counter(u16 last_used_idx)
1429 {
1430 	return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1431 }
1432 
packed_last_used(u16 last_used_idx)1433 static u16 packed_last_used(u16 last_used_idx)
1434 {
1435 	return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1436 }
1437 
vring_unmap_extra_packed(const struct vring_virtqueue * vq,const struct vring_desc_extra * extra)1438 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1439 				     const struct vring_desc_extra *extra)
1440 {
1441 	u16 flags;
1442 
1443 	flags = extra->flags;
1444 
1445 	if (flags & VRING_DESC_F_INDIRECT) {
1446 		if (!vq->use_map_api)
1447 			return;
1448 	} else if (!vring_need_unmap_buffer(vq, extra))
1449 		return;
1450 
1451 	virtqueue_unmap_page_attrs(&vq->vq,
1452 				   extra->addr, extra->len,
1453 				   (flags & VRING_DESC_F_WRITE) ?
1454 				   DMA_FROM_DEVICE : DMA_TO_DEVICE,
1455 				   0);
1456 }
1457 
alloc_indirect_packed(unsigned int total_sg,gfp_t gfp)1458 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1459 						       gfp_t gfp)
1460 {
1461 	struct vring_desc_extra *extra;
1462 	struct vring_packed_desc *desc;
1463 	int i, size;
1464 
1465 	/*
1466 	 * We require lowmem mappings for the descriptors because
1467 	 * otherwise virt_to_phys will give us bogus addresses in the
1468 	 * virtqueue.
1469 	 */
1470 	gfp &= ~__GFP_HIGHMEM;
1471 
1472 	size = (sizeof(*desc) + sizeof(*extra)) * total_sg;
1473 
1474 	desc = kmalloc(size, gfp);
1475 	if (!desc)
1476 		return NULL;
1477 
1478 	extra = (struct vring_desc_extra *)&desc[total_sg];
1479 
1480 	for (i = 0; i < total_sg; i++)
1481 		extra[i].next = i + 1;
1482 
1483 	return desc;
1484 }
1485 
virtqueue_add_indirect_packed(struct vring_virtqueue * vq,struct scatterlist * sgs[],unsigned int total_sg,unsigned int out_sgs,unsigned int in_sgs,void * data,bool premapped,gfp_t gfp,u16 id,unsigned long attr)1486 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1487 					 struct scatterlist *sgs[],
1488 					 unsigned int total_sg,
1489 					 unsigned int out_sgs,
1490 					 unsigned int in_sgs,
1491 					 void *data,
1492 					 bool premapped,
1493 					 gfp_t gfp,
1494 					 u16 id,
1495 					 unsigned long attr)
1496 {
1497 	struct vring_desc_extra *extra;
1498 	struct vring_packed_desc *desc;
1499 	struct scatterlist *sg;
1500 	unsigned int i, n, err_idx, len, total_in_len = 0;
1501 	u16 head;
1502 	dma_addr_t addr;
1503 
1504 	head = vq->packed.next_avail_idx;
1505 	desc = alloc_indirect_packed(total_sg, gfp);
1506 	if (!desc)
1507 		return -ENOMEM;
1508 
1509 	extra = (struct vring_desc_extra *)&desc[total_sg];
1510 
1511 	if (unlikely(vq->vq.num_free < 1)) {
1512 		pr_debug("Can't add buf len 1 - avail = 0\n");
1513 		kfree(desc);
1514 		END_USE(vq);
1515 		return -ENOSPC;
1516 	}
1517 
1518 	i = 0;
1519 
1520 	for (n = 0; n < out_sgs + in_sgs; n++) {
1521 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1522 			if (vring_map_one_sg(vq, sg, n < out_sgs ?
1523 					     DMA_TO_DEVICE : DMA_FROM_DEVICE,
1524 					     &addr, &len, premapped, attr))
1525 				goto unmap_release;
1526 
1527 			desc[i].flags = cpu_to_le16(n < out_sgs ?
1528 						0 : VRING_DESC_F_WRITE);
1529 			desc[i].addr = cpu_to_le64(addr);
1530 			desc[i].len = cpu_to_le32(len);
1531 
1532 			if (unlikely(vq->use_map_api)) {
1533 				extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr;
1534 				extra[i].len = len;
1535 				extra[i].flags = n < out_sgs ?  0 : VRING_DESC_F_WRITE;
1536 			}
1537 
1538 			if (n >= out_sgs)
1539 				total_in_len += len;
1540 			i++;
1541 		}
1542 	}
1543 
1544 	/* Now that the indirect table is filled in, map it. */
1545 	addr = vring_map_single(vq, desc,
1546 			total_sg * sizeof(struct vring_packed_desc),
1547 			DMA_TO_DEVICE);
1548 	if (vring_mapping_error(vq, addr))
1549 		goto unmap_release;
1550 
1551 	vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1552 	vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1553 				sizeof(struct vring_packed_desc));
1554 	vq->packed.vring.desc[head].id = cpu_to_le16(id);
1555 
1556 	if (vq->use_map_api) {
1557 		vq->packed.desc_extra[id].addr = addr;
1558 		vq->packed.desc_extra[id].len = total_sg *
1559 				sizeof(struct vring_packed_desc);
1560 		vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1561 						  vq->packed.avail_used_flags;
1562 	}
1563 
1564 	/*
1565 	 * A driver MUST NOT make the first descriptor in the list
1566 	 * available before all subsequent descriptors comprising
1567 	 * the list are made available.
1568 	 */
1569 	virtio_wmb(vq->weak_barriers);
1570 	vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1571 						vq->packed.avail_used_flags);
1572 
1573 	/* We're using some buffers from the free list. */
1574 	vq->vq.num_free -= 1;
1575 
1576 	/* Update free pointer */
1577 	n = head + 1;
1578 	if (n >= vq->packed.vring.num) {
1579 		n = 0;
1580 		vq->packed.avail_wrap_counter ^= 1;
1581 		vq->packed.avail_used_flags ^=
1582 				1 << VRING_PACKED_DESC_F_AVAIL |
1583 				1 << VRING_PACKED_DESC_F_USED;
1584 	}
1585 	vq->packed.next_avail_idx = n;
1586 	if (!virtqueue_is_in_order(vq))
1587 		vq->free_head = vq->packed.desc_extra[id].next;
1588 
1589 	/* Store token and indirect buffer state. */
1590 	vq->packed.desc_state[id].num = 1;
1591 	vq->packed.desc_state[id].data = data;
1592 	vq->packed.desc_state[id].indir_desc = desc;
1593 	vq->packed.desc_state[id].last = id;
1594 	vq->packed.desc_state[id].total_in_len = total_in_len;
1595 
1596 	vq->num_added += 1;
1597 
1598 	pr_debug("Added buffer head %i to %p\n", head, vq);
1599 	END_USE(vq);
1600 
1601 	return 0;
1602 
1603 unmap_release:
1604 	err_idx = i;
1605 
1606 	for (i = 0; i < err_idx; i++)
1607 		vring_unmap_extra_packed(vq, &extra[i]);
1608 
1609 	kfree(desc);
1610 
1611 	END_USE(vq);
1612 	return -ENOMEM;
1613 }
1614 
virtqueue_add_packed(struct vring_virtqueue * vq,struct scatterlist * sgs[],unsigned int total_sg,unsigned int out_sgs,unsigned int in_sgs,void * data,void * ctx,bool premapped,gfp_t gfp,unsigned long attr)1615 static inline int virtqueue_add_packed(struct vring_virtqueue *vq,
1616 				       struct scatterlist *sgs[],
1617 				       unsigned int total_sg,
1618 				       unsigned int out_sgs,
1619 				       unsigned int in_sgs,
1620 				       void *data,
1621 				       void *ctx,
1622 				       bool premapped,
1623 				       gfp_t gfp,
1624 				       unsigned long attr)
1625 {
1626 	struct vring_packed_desc *desc;
1627 	struct scatterlist *sg;
1628 	unsigned int i, n, c, descs_used, err_idx, len;
1629 	__le16 head_flags, flags;
1630 	u16 head, id, prev, curr, avail_used_flags;
1631 	int err;
1632 
1633 	START_USE(vq);
1634 
1635 	BUG_ON(data == NULL);
1636 	BUG_ON(ctx && vq->indirect);
1637 
1638 	if (unlikely(vq->broken)) {
1639 		END_USE(vq);
1640 		return -EIO;
1641 	}
1642 
1643 	LAST_ADD_TIME_UPDATE(vq);
1644 
1645 	BUG_ON(total_sg == 0);
1646 
1647 	if (virtqueue_use_indirect(vq, total_sg)) {
1648 		id = vq->free_head;
1649 		BUG_ON(id == vq->packed.vring.num);
1650 		err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1651 						    in_sgs, data, premapped, gfp,
1652 						    id, attr);
1653 		if (err != -ENOMEM) {
1654 			END_USE(vq);
1655 			return err;
1656 		}
1657 
1658 		/* fall back on direct */
1659 	}
1660 
1661 	head = vq->packed.next_avail_idx;
1662 	avail_used_flags = vq->packed.avail_used_flags;
1663 
1664 	WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1665 
1666 	desc = vq->packed.vring.desc;
1667 	i = head;
1668 	descs_used = total_sg;
1669 
1670 	if (unlikely(vq->vq.num_free < descs_used)) {
1671 		pr_debug("Can't add buf len %i - avail = %i\n",
1672 			 descs_used, vq->vq.num_free);
1673 		END_USE(vq);
1674 		return -ENOSPC;
1675 	}
1676 
1677 	id = vq->free_head;
1678 	BUG_ON(id == vq->packed.vring.num);
1679 
1680 	curr = id;
1681 	c = 0;
1682 	for (n = 0; n < out_sgs + in_sgs; n++) {
1683 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1684 			dma_addr_t addr;
1685 
1686 			if (vring_map_one_sg(vq, sg, n < out_sgs ?
1687 					     DMA_TO_DEVICE : DMA_FROM_DEVICE,
1688 					     &addr, &len, premapped, attr))
1689 				goto unmap_release;
1690 
1691 			flags = cpu_to_le16(vq->packed.avail_used_flags |
1692 				    (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1693 				    (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1694 			if (i == head)
1695 				head_flags = flags;
1696 			else
1697 				desc[i].flags = flags;
1698 
1699 			desc[i].addr = cpu_to_le64(addr);
1700 			desc[i].len = cpu_to_le32(len);
1701 			desc[i].id = cpu_to_le16(id);
1702 
1703 			if (unlikely(vq->use_map_api)) {
1704 				vq->packed.desc_extra[curr].addr = premapped ?
1705 					DMA_MAPPING_ERROR : addr;
1706 				vq->packed.desc_extra[curr].len = len;
1707 				vq->packed.desc_extra[curr].flags =
1708 					le16_to_cpu(flags);
1709 			}
1710 			prev = curr;
1711 			curr = vq->packed.desc_extra[curr].next;
1712 
1713 			if ((unlikely(++i >= vq->packed.vring.num))) {
1714 				i = 0;
1715 				vq->packed.avail_used_flags ^=
1716 					1 << VRING_PACKED_DESC_F_AVAIL |
1717 					1 << VRING_PACKED_DESC_F_USED;
1718 			}
1719 		}
1720 	}
1721 
1722 	if (i <= head)
1723 		vq->packed.avail_wrap_counter ^= 1;
1724 
1725 	/* We're using some buffers from the free list. */
1726 	vq->vq.num_free -= descs_used;
1727 
1728 	/* Update free pointer */
1729 	vq->packed.next_avail_idx = i;
1730 	vq->free_head = curr;
1731 
1732 	/* Store token. */
1733 	vq->packed.desc_state[id].num = descs_used;
1734 	vq->packed.desc_state[id].data = data;
1735 	vq->packed.desc_state[id].indir_desc = ctx;
1736 	vq->packed.desc_state[id].last = prev;
1737 
1738 	/*
1739 	 * A driver MUST NOT make the first descriptor in the list
1740 	 * available before all subsequent descriptors comprising
1741 	 * the list are made available.
1742 	 */
1743 	virtio_wmb(vq->weak_barriers);
1744 	vq->packed.vring.desc[head].flags = head_flags;
1745 	vq->num_added += descs_used;
1746 
1747 	pr_debug("Added buffer head %i to %p\n", head, vq);
1748 	END_USE(vq);
1749 
1750 	return 0;
1751 
1752 unmap_release:
1753 	err_idx = i;
1754 	i = head;
1755 	curr = vq->free_head;
1756 
1757 	vq->packed.avail_used_flags = avail_used_flags;
1758 
1759 	for (n = 0; n < total_sg; n++) {
1760 		if (i == err_idx)
1761 			break;
1762 		vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
1763 		curr = vq->packed.desc_extra[curr].next;
1764 		i++;
1765 		if (i >= vq->packed.vring.num)
1766 			i = 0;
1767 	}
1768 
1769 	END_USE(vq);
1770 	return -EIO;
1771 }
1772 
virtqueue_add_packed_in_order(struct vring_virtqueue * vq,struct scatterlist * sgs[],unsigned int total_sg,unsigned int out_sgs,unsigned int in_sgs,void * data,void * ctx,bool premapped,gfp_t gfp,unsigned long attr)1773 static inline int virtqueue_add_packed_in_order(struct vring_virtqueue *vq,
1774 						struct scatterlist *sgs[],
1775 						unsigned int total_sg,
1776 						unsigned int out_sgs,
1777 						unsigned int in_sgs,
1778 						void *data,
1779 						void *ctx,
1780 						bool premapped,
1781 						gfp_t gfp,
1782 						unsigned long attr)
1783 {
1784 	struct vring_packed_desc *desc;
1785 	struct scatterlist *sg;
1786 	unsigned int i, n, sg_count, err_idx, total_in_len = 0;
1787 	__le16 head_flags, flags;
1788 	u16 head, avail_used_flags;
1789 	bool avail_wrap_counter;
1790 	int err;
1791 
1792 	START_USE(vq);
1793 
1794 	BUG_ON(data == NULL);
1795 	BUG_ON(ctx && vq->indirect);
1796 
1797 	if (unlikely(vq->broken)) {
1798 		END_USE(vq);
1799 		return -EIO;
1800 	}
1801 
1802 	LAST_ADD_TIME_UPDATE(vq);
1803 
1804 	BUG_ON(total_sg == 0);
1805 
1806 	if (virtqueue_use_indirect(vq, total_sg)) {
1807 		err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1808 						    in_sgs, data, premapped, gfp,
1809 						    vq->packed.next_avail_idx,
1810 						    attr);
1811 		if (err != -ENOMEM) {
1812 			END_USE(vq);
1813 			return err;
1814 		}
1815 
1816 		/* fall back on direct */
1817 	}
1818 
1819 	head = vq->packed.next_avail_idx;
1820 	avail_used_flags = vq->packed.avail_used_flags;
1821 	avail_wrap_counter = vq->packed.avail_wrap_counter;
1822 
1823 	WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1824 
1825 	desc = vq->packed.vring.desc;
1826 	i = head;
1827 
1828 	if (unlikely(vq->vq.num_free < total_sg)) {
1829 		pr_debug("Can't add buf len %i - avail = %i\n",
1830 			 total_sg, vq->vq.num_free);
1831 		END_USE(vq);
1832 		return -ENOSPC;
1833 	}
1834 
1835 	sg_count = 0;
1836 	for (n = 0; n < out_sgs + in_sgs; n++) {
1837 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1838 			dma_addr_t addr;
1839 			u32 len;
1840 
1841 			flags = 0;
1842 			if (++sg_count != total_sg)
1843 				flags |= cpu_to_le16(VRING_DESC_F_NEXT);
1844 			if (n >= out_sgs)
1845 				flags |= cpu_to_le16(VRING_DESC_F_WRITE);
1846 
1847 			if (vring_map_one_sg(vq, sg, n < out_sgs ?
1848 					     DMA_TO_DEVICE : DMA_FROM_DEVICE,
1849 					     &addr, &len, premapped, attr))
1850 				goto unmap_release;
1851 
1852 			flags |= cpu_to_le16(vq->packed.avail_used_flags);
1853 
1854 			if (i == head)
1855 				head_flags = flags;
1856 			else
1857 				desc[i].flags = flags;
1858 
1859 			desc[i].addr = cpu_to_le64(addr);
1860 			desc[i].len = cpu_to_le32(len);
1861 			desc[i].id = cpu_to_le16(head);
1862 
1863 			if (unlikely(vq->use_map_api)) {
1864 				vq->packed.desc_extra[i].addr = premapped ?
1865 				      DMA_MAPPING_ERROR : addr;
1866 				vq->packed.desc_extra[i].len = len;
1867 				vq->packed.desc_extra[i].flags =
1868 					le16_to_cpu(flags);
1869 			}
1870 
1871 			if ((unlikely(++i >= vq->packed.vring.num))) {
1872 				i = 0;
1873 				vq->packed.avail_used_flags ^=
1874 					1 << VRING_PACKED_DESC_F_AVAIL |
1875 					1 << VRING_PACKED_DESC_F_USED;
1876 				vq->packed.avail_wrap_counter ^= 1;
1877 			}
1878 
1879 			if (n >= out_sgs)
1880 				total_in_len += len;
1881 		}
1882 	}
1883 
1884 	/* We're using some buffers from the free list. */
1885 	vq->vq.num_free -= total_sg;
1886 
1887 	/* Update free pointer */
1888 	vq->packed.next_avail_idx = i;
1889 
1890 	/* Store token. */
1891 	vq->packed.desc_state[head].num = total_sg;
1892 	vq->packed.desc_state[head].data = data;
1893 	vq->packed.desc_state[head].indir_desc = ctx;
1894 	vq->packed.desc_state[head].total_in_len = total_in_len;
1895 
1896 	/*
1897 	 * A driver MUST NOT make the first descriptor in the list
1898 	 * available before all subsequent descriptors comprising
1899 	 * the list are made available.
1900 	 */
1901 	virtio_wmb(vq->weak_barriers);
1902 	vq->packed.vring.desc[head].flags = head_flags;
1903 	vq->num_added += total_sg;
1904 
1905 	pr_debug("Added buffer head %i to %p\n", head, vq);
1906 	END_USE(vq);
1907 
1908 	return 0;
1909 
1910 unmap_release:
1911 	err_idx = i;
1912 	i = head;
1913 	vq->packed.avail_used_flags = avail_used_flags;
1914 	vq->packed.avail_wrap_counter = avail_wrap_counter;
1915 
1916 	for (n = 0; n < total_sg; n++) {
1917 		if (i == err_idx)
1918 			break;
1919 		vring_unmap_extra_packed(vq, &vq->packed.desc_extra[i]);
1920 		i++;
1921 		if (i >= vq->packed.vring.num)
1922 			i = 0;
1923 	}
1924 
1925 	END_USE(vq);
1926 	return -EIO;
1927 }
1928 
virtqueue_kick_prepare_packed(struct vring_virtqueue * vq)1929 static bool virtqueue_kick_prepare_packed(struct vring_virtqueue *vq)
1930 {
1931 	u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1932 	bool needs_kick;
1933 	union {
1934 		struct {
1935 			__le16 off_wrap;
1936 			__le16 flags;
1937 		};
1938 		u32 u32;
1939 	} snapshot;
1940 
1941 	START_USE(vq);
1942 
1943 	/*
1944 	 * We need to expose the new flags value before checking notification
1945 	 * suppressions.
1946 	 */
1947 	virtio_mb(vq->weak_barriers);
1948 
1949 	old = vq->packed.next_avail_idx - vq->num_added;
1950 	new = vq->packed.next_avail_idx;
1951 	vq->num_added = 0;
1952 
1953 	snapshot.u32 = *(u32 *)vq->packed.vring.device;
1954 	flags = le16_to_cpu(snapshot.flags);
1955 
1956 	LAST_ADD_TIME_CHECK(vq);
1957 	LAST_ADD_TIME_INVALID(vq);
1958 
1959 	if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1960 		needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1961 		goto out;
1962 	}
1963 
1964 	off_wrap = le16_to_cpu(snapshot.off_wrap);
1965 
1966 	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1967 	event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1968 	if (wrap_counter != vq->packed.avail_wrap_counter)
1969 		event_idx -= vq->packed.vring.num;
1970 
1971 	needs_kick = vring_need_event(event_idx, new, old);
1972 out:
1973 	END_USE(vq);
1974 	return needs_kick;
1975 }
1976 
detach_buf_packed_in_order(struct vring_virtqueue * vq,unsigned int id,void ** ctx)1977 static void detach_buf_packed_in_order(struct vring_virtqueue *vq,
1978 				       unsigned int id, void **ctx)
1979 {
1980 	struct vring_desc_state_packed *state = NULL;
1981 	struct vring_packed_desc *desc;
1982 	unsigned int i, curr;
1983 
1984 	state = &vq->packed.desc_state[id];
1985 
1986 	/* Clear data ptr. */
1987 	state->data = NULL;
1988 
1989 	vq->vq.num_free += state->num;
1990 
1991 	if (unlikely(vq->use_map_api)) {
1992 		curr = id;
1993 		for (i = 0; i < state->num; i++) {
1994 			vring_unmap_extra_packed(vq,
1995 						 &vq->packed.desc_extra[curr]);
1996 			curr = vq->packed.desc_extra[curr].next;
1997 		}
1998 	}
1999 
2000 	if (vq->indirect) {
2001 		struct vring_desc_extra *extra;
2002 		u32 len, num;
2003 
2004 		/* Free the indirect table, if any, now that it's unmapped. */
2005 		desc = state->indir_desc;
2006 		if (!desc)
2007 			return;
2008 
2009 		if (vq->use_map_api) {
2010 			len = vq->packed.desc_extra[id].len;
2011 			num = len / sizeof(struct vring_packed_desc);
2012 
2013 			extra = (struct vring_desc_extra *)&desc[num];
2014 
2015 			for (i = 0; i < num; i++)
2016 				vring_unmap_extra_packed(vq, &extra[i]);
2017 		}
2018 		kfree(desc);
2019 		state->indir_desc = NULL;
2020 	} else if (ctx) {
2021 		*ctx = state->indir_desc;
2022 	}
2023 }
2024 
detach_buf_packed(struct vring_virtqueue * vq,unsigned int id,void ** ctx)2025 static void detach_buf_packed(struct vring_virtqueue *vq,
2026 			      unsigned int id, void **ctx)
2027 {
2028 	struct vring_desc_state_packed *state = &vq->packed.desc_state[id];
2029 
2030 	vq->packed.desc_extra[state->last].next = vq->free_head;
2031 	vq->free_head = id;
2032 
2033 	detach_buf_packed_in_order(vq, id, ctx);
2034 }
2035 
is_used_desc_packed(const struct vring_virtqueue * vq,u16 idx,bool used_wrap_counter)2036 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
2037 				       u16 idx, bool used_wrap_counter)
2038 {
2039 	bool avail, used;
2040 	u16 flags;
2041 
2042 	flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
2043 	avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
2044 	used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
2045 
2046 	return avail == used && used == used_wrap_counter;
2047 }
2048 
virtqueue_poll_packed(const struct vring_virtqueue * vq,unsigned int off_wrap)2049 static bool virtqueue_poll_packed(const struct vring_virtqueue *vq,
2050 				  unsigned int off_wrap)
2051 {
2052 	bool wrap_counter;
2053 	u16 used_idx;
2054 
2055 	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
2056 	used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
2057 
2058 	return is_used_desc_packed(vq, used_idx, wrap_counter);
2059 }
2060 
more_used_packed(const struct vring_virtqueue * vq)2061 static bool more_used_packed(const struct vring_virtqueue *vq)
2062 {
2063 	return virtqueue_poll_packed(vq, READ_ONCE(vq->last_used_idx));
2064 }
2065 
update_last_used_idx_packed(struct vring_virtqueue * vq,u16 id,u16 last_used,u16 used_wrap_counter)2066 static void update_last_used_idx_packed(struct vring_virtqueue *vq,
2067 					u16 id, u16 last_used,
2068 					u16 used_wrap_counter)
2069 {
2070 	last_used += vq->packed.desc_state[id].num;
2071 	if (unlikely(last_used >= vq->packed.vring.num)) {
2072 		last_used -= vq->packed.vring.num;
2073 		used_wrap_counter ^= 1;
2074 	}
2075 
2076 	last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
2077 	WRITE_ONCE(vq->last_used_idx, last_used);
2078 
2079 	/*
2080 	 * If we expect an interrupt for the next entry, tell host
2081 	 * by writing event index and flush out the write before
2082 	 * the read in the next get_buf call.
2083 	 */
2084 	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
2085 		virtio_store_mb(vq->weak_barriers,
2086 				&vq->packed.vring.driver->off_wrap,
2087 				cpu_to_le16(vq->last_used_idx));
2088 }
2089 
more_used_packed_in_order(const struct vring_virtqueue * vq)2090 static bool more_used_packed_in_order(const struct vring_virtqueue *vq)
2091 {
2092 	if (vq->batch_last.id != UINT_MAX)
2093 		return true;
2094 
2095 	return virtqueue_poll_packed(vq, READ_ONCE(vq->last_used_idx));
2096 }
2097 
virtqueue_get_buf_ctx_packed_in_order(struct vring_virtqueue * vq,unsigned int * len,void ** ctx)2098 static void *virtqueue_get_buf_ctx_packed_in_order(struct vring_virtqueue *vq,
2099 						   unsigned int *len,
2100 						   void **ctx)
2101 {
2102 	unsigned int num = vq->packed.vring.num;
2103 	u16 last_used, last_used_idx;
2104 	bool used_wrap_counter;
2105 	void *ret;
2106 
2107 	START_USE(vq);
2108 
2109 	if (unlikely(vq->broken)) {
2110 		END_USE(vq);
2111 		return NULL;
2112 	}
2113 
2114 	last_used_idx = vq->last_used_idx;
2115 	used_wrap_counter = packed_used_wrap_counter(last_used_idx);
2116 	last_used = packed_last_used(last_used_idx);
2117 
2118 	if (vq->batch_last.id == UINT_MAX) {
2119 		if (!more_used_packed_in_order(vq)) {
2120 			pr_debug("No more buffers in queue\n");
2121 			END_USE(vq);
2122 			return NULL;
2123 		}
2124 		/* Only get used elements after they have been exposed by host. */
2125 		virtio_rmb(vq->weak_barriers);
2126 		vq->batch_last.id =
2127 			le16_to_cpu(vq->packed.vring.desc[last_used].id);
2128 		vq->batch_last.len =
2129 			le32_to_cpu(vq->packed.vring.desc[last_used].len);
2130 	}
2131 
2132 	if (vq->batch_last.id == last_used) {
2133 		vq->batch_last.id = UINT_MAX;
2134 		*len = vq->batch_last.len;
2135 	} else {
2136 		*len = vq->packed.desc_state[last_used].total_in_len;
2137 	}
2138 
2139 	if (unlikely(last_used >= num)) {
2140 		BAD_RING(vq, "id %u out of range\n", last_used);
2141 		return NULL;
2142 	}
2143 	if (unlikely(!vq->packed.desc_state[last_used].data)) {
2144 		BAD_RING(vq, "id %u is not a head!\n", last_used);
2145 		return NULL;
2146 	}
2147 
2148 	/* detach_buf_packed clears data, so grab it now. */
2149 	ret = vq->packed.desc_state[last_used].data;
2150 	detach_buf_packed_in_order(vq, last_used, ctx);
2151 
2152 	update_last_used_idx_packed(vq, last_used, last_used,
2153 				    used_wrap_counter);
2154 
2155 	LAST_ADD_TIME_INVALID(vq);
2156 
2157 	END_USE(vq);
2158 	return ret;
2159 }
2160 
virtqueue_get_buf_ctx_packed(struct vring_virtqueue * vq,unsigned int * len,void ** ctx)2161 static void *virtqueue_get_buf_ctx_packed(struct vring_virtqueue *vq,
2162 					  unsigned int *len,
2163 					  void **ctx)
2164 {
2165 	unsigned int num = vq->packed.vring.num;
2166 	u16 last_used, id, last_used_idx;
2167 	bool used_wrap_counter;
2168 	void *ret;
2169 
2170 	START_USE(vq);
2171 
2172 	if (unlikely(vq->broken)) {
2173 		END_USE(vq);
2174 		return NULL;
2175 	}
2176 
2177 	if (!more_used_packed(vq)) {
2178 		pr_debug("No more buffers in queue\n");
2179 		END_USE(vq);
2180 		return NULL;
2181 	}
2182 
2183 	/* Only get used elements after they have been exposed by host. */
2184 	virtio_rmb(vq->weak_barriers);
2185 
2186 	last_used_idx = READ_ONCE(vq->last_used_idx);
2187 	used_wrap_counter = packed_used_wrap_counter(last_used_idx);
2188 	last_used = packed_last_used(last_used_idx);
2189 	id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
2190 	*len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
2191 
2192 	if (unlikely(id >= num)) {
2193 		BAD_RING(vq, "id %u out of range\n", id);
2194 		return NULL;
2195 	}
2196 	if (unlikely(!vq->packed.desc_state[id].data)) {
2197 		BAD_RING(vq, "id %u is not a head!\n", id);
2198 		return NULL;
2199 	}
2200 
2201 	/* detach_buf_packed clears data, so grab it now. */
2202 	ret = vq->packed.desc_state[id].data;
2203 	detach_buf_packed(vq, id, ctx);
2204 
2205 	update_last_used_idx_packed(vq, id, last_used, used_wrap_counter);
2206 
2207 	LAST_ADD_TIME_INVALID(vq);
2208 
2209 	END_USE(vq);
2210 	return ret;
2211 }
2212 
virtqueue_disable_cb_packed(struct vring_virtqueue * vq)2213 static void virtqueue_disable_cb_packed(struct vring_virtqueue *vq)
2214 {
2215 	if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
2216 		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
2217 
2218 		/*
2219 		 * If device triggered an event already it won't trigger one again:
2220 		 * no need to disable.
2221 		 */
2222 		if (vq->event_triggered)
2223 			return;
2224 
2225 		vq->packed.vring.driver->flags =
2226 			cpu_to_le16(vq->packed.event_flags_shadow);
2227 	}
2228 }
2229 
virtqueue_enable_cb_prepare_packed(struct vring_virtqueue * vq)2230 static unsigned int virtqueue_enable_cb_prepare_packed(struct vring_virtqueue *vq)
2231 {
2232 	START_USE(vq);
2233 
2234 	/*
2235 	 * We optimistically turn back on interrupts, then check if there was
2236 	 * more to do.
2237 	 */
2238 
2239 	if (vq->event) {
2240 		vq->packed.vring.driver->off_wrap =
2241 			cpu_to_le16(vq->last_used_idx);
2242 		/*
2243 		 * We need to update event offset and event wrap
2244 		 * counter first before updating event flags.
2245 		 */
2246 		virtio_wmb(vq->weak_barriers);
2247 	}
2248 
2249 	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
2250 		vq->packed.event_flags_shadow = vq->event ?
2251 				VRING_PACKED_EVENT_FLAG_DESC :
2252 				VRING_PACKED_EVENT_FLAG_ENABLE;
2253 		vq->packed.vring.driver->flags =
2254 				cpu_to_le16(vq->packed.event_flags_shadow);
2255 	}
2256 
2257 	END_USE(vq);
2258 	return vq->last_used_idx;
2259 }
2260 
virtqueue_enable_cb_delayed_packed(struct vring_virtqueue * vq)2261 static bool virtqueue_enable_cb_delayed_packed(struct vring_virtqueue *vq)
2262 {
2263 	u16 used_idx, wrap_counter, last_used_idx;
2264 	u16 bufs;
2265 
2266 	START_USE(vq);
2267 
2268 	/*
2269 	 * We optimistically turn back on interrupts, then check if there was
2270 	 * more to do.
2271 	 */
2272 
2273 	if (vq->event) {
2274 		/* TODO: tune this threshold */
2275 		bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
2276 		last_used_idx = READ_ONCE(vq->last_used_idx);
2277 		wrap_counter = packed_used_wrap_counter(last_used_idx);
2278 
2279 		used_idx = packed_last_used(last_used_idx) + bufs;
2280 		if (used_idx >= vq->packed.vring.num) {
2281 			used_idx -= vq->packed.vring.num;
2282 			wrap_counter ^= 1;
2283 		}
2284 
2285 		vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
2286 			(wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
2287 
2288 		/*
2289 		 * We need to update event offset and event wrap
2290 		 * counter first before updating event flags.
2291 		 */
2292 		virtio_wmb(vq->weak_barriers);
2293 	}
2294 
2295 	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
2296 		vq->packed.event_flags_shadow = vq->event ?
2297 				VRING_PACKED_EVENT_FLAG_DESC :
2298 				VRING_PACKED_EVENT_FLAG_ENABLE;
2299 		vq->packed.vring.driver->flags =
2300 				cpu_to_le16(vq->packed.event_flags_shadow);
2301 	}
2302 
2303 	/*
2304 	 * We need to update event suppression structure first
2305 	 * before re-checking for more used buffers.
2306 	 */
2307 	virtio_mb(vq->weak_barriers);
2308 
2309 	last_used_idx = READ_ONCE(vq->last_used_idx);
2310 	wrap_counter = packed_used_wrap_counter(last_used_idx);
2311 	used_idx = packed_last_used(last_used_idx);
2312 	if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
2313 		END_USE(vq);
2314 		return false;
2315 	}
2316 
2317 	END_USE(vq);
2318 	return true;
2319 }
2320 
virtqueue_detach_unused_buf_packed(struct vring_virtqueue * vq)2321 static void *virtqueue_detach_unused_buf_packed(struct vring_virtqueue *vq)
2322 {
2323 	unsigned int i;
2324 	void *buf;
2325 
2326 	START_USE(vq);
2327 
2328 	for (i = 0; i < vq->packed.vring.num; i++) {
2329 		if (!vq->packed.desc_state[i].data)
2330 			continue;
2331 		/* detach_buf clears data, so grab it now. */
2332 		buf = vq->packed.desc_state[i].data;
2333 		if (virtqueue_is_in_order(vq))
2334 			detach_buf_packed_in_order(vq, i, NULL);
2335 		else
2336 			detach_buf_packed(vq, i, NULL);
2337 		END_USE(vq);
2338 		return buf;
2339 	}
2340 	/* That should have freed everything. */
2341 	BUG_ON(vq->vq.num_free != vq->packed.vring.num);
2342 
2343 	END_USE(vq);
2344 	return NULL;
2345 }
2346 
vring_alloc_desc_extra(unsigned int num)2347 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
2348 {
2349 	struct vring_desc_extra *desc_extra;
2350 	unsigned int i;
2351 
2352 	desc_extra = kmalloc_objs(struct vring_desc_extra, num, GFP_KERNEL);
2353 	if (!desc_extra)
2354 		return NULL;
2355 
2356 	memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
2357 
2358 	for (i = 0; i < num - 1; i++)
2359 		desc_extra[i].next = i + 1;
2360 
2361 	desc_extra[num - 1].next = 0;
2362 
2363 	return desc_extra;
2364 }
2365 
vring_free_packed(struct vring_virtqueue_packed * vring_packed,struct virtio_device * vdev,union virtio_map map)2366 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
2367 			      struct virtio_device *vdev,
2368 			      union virtio_map map)
2369 {
2370 	if (vring_packed->vring.desc)
2371 		vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
2372 				 vring_packed->vring.desc,
2373 				 vring_packed->ring_dma_addr,
2374 				 map);
2375 
2376 	if (vring_packed->vring.driver)
2377 		vring_free_queue(vdev, vring_packed->event_size_in_bytes,
2378 				 vring_packed->vring.driver,
2379 				 vring_packed->driver_event_dma_addr,
2380 				 map);
2381 
2382 	if (vring_packed->vring.device)
2383 		vring_free_queue(vdev, vring_packed->event_size_in_bytes,
2384 				 vring_packed->vring.device,
2385 				 vring_packed->device_event_dma_addr,
2386 				 map);
2387 
2388 	kfree(vring_packed->desc_state);
2389 	kfree(vring_packed->desc_extra);
2390 }
2391 
vring_alloc_queue_packed(struct vring_virtqueue_packed * vring_packed,struct virtio_device * vdev,u32 num,union virtio_map map)2392 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
2393 				    struct virtio_device *vdev,
2394 				    u32 num, union virtio_map map)
2395 {
2396 	struct vring_packed_desc *ring;
2397 	struct vring_packed_desc_event *driver, *device;
2398 	dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
2399 	size_t ring_size_in_bytes, event_size_in_bytes;
2400 
2401 	ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
2402 
2403 	ring = vring_alloc_queue(vdev, ring_size_in_bytes,
2404 				 &ring_dma_addr,
2405 				 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
2406 				 map);
2407 	if (!ring)
2408 		goto err;
2409 
2410 	vring_packed->vring.desc         = ring;
2411 	vring_packed->ring_dma_addr      = ring_dma_addr;
2412 	vring_packed->ring_size_in_bytes = ring_size_in_bytes;
2413 
2414 	event_size_in_bytes = sizeof(struct vring_packed_desc_event);
2415 
2416 	driver = vring_alloc_queue(vdev, event_size_in_bytes,
2417 				   &driver_event_dma_addr,
2418 				   GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
2419 				   map);
2420 	if (!driver)
2421 		goto err;
2422 
2423 	vring_packed->vring.driver          = driver;
2424 	vring_packed->event_size_in_bytes   = event_size_in_bytes;
2425 	vring_packed->driver_event_dma_addr = driver_event_dma_addr;
2426 
2427 	device = vring_alloc_queue(vdev, event_size_in_bytes,
2428 				   &device_event_dma_addr,
2429 				   GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
2430 				   map);
2431 	if (!device)
2432 		goto err;
2433 
2434 	vring_packed->vring.device          = device;
2435 	vring_packed->device_event_dma_addr = device_event_dma_addr;
2436 
2437 	vring_packed->vring.num = num;
2438 
2439 	return 0;
2440 
2441 err:
2442 	vring_free_packed(vring_packed, vdev, map);
2443 	return -ENOMEM;
2444 }
2445 
vring_alloc_state_extra_packed(struct vring_virtqueue_packed * vring_packed)2446 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
2447 {
2448 	struct vring_desc_state_packed *state;
2449 	struct vring_desc_extra *extra;
2450 	u32 num = vring_packed->vring.num;
2451 
2452 	state = kmalloc_objs(struct vring_desc_state_packed, num, GFP_KERNEL);
2453 	if (!state)
2454 		goto err_desc_state;
2455 
2456 	memset(state, 0, num * sizeof(struct vring_desc_state_packed));
2457 
2458 	extra = vring_alloc_desc_extra(num);
2459 	if (!extra)
2460 		goto err_desc_extra;
2461 
2462 	vring_packed->desc_state = state;
2463 	vring_packed->desc_extra = extra;
2464 
2465 	return 0;
2466 
2467 err_desc_extra:
2468 	kfree(state);
2469 err_desc_state:
2470 	return -ENOMEM;
2471 }
2472 
virtqueue_vring_init_packed(struct vring_virtqueue_packed * vring_packed,bool callback)2473 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
2474 					bool callback)
2475 {
2476 	vring_packed->next_avail_idx = 0;
2477 	vring_packed->avail_wrap_counter = 1;
2478 	vring_packed->event_flags_shadow = 0;
2479 	vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
2480 
2481 	/* No callback?  Tell other side not to bother us. */
2482 	if (!callback) {
2483 		vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
2484 		vring_packed->vring.driver->flags =
2485 			cpu_to_le16(vring_packed->event_flags_shadow);
2486 	}
2487 }
2488 
virtqueue_vring_attach_packed(struct vring_virtqueue * vq,struct vring_virtqueue_packed * vring_packed)2489 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
2490 					  struct vring_virtqueue_packed *vring_packed)
2491 {
2492 	vq->packed = *vring_packed;
2493 
2494 	if (virtqueue_is_in_order(vq)) {
2495 		vq->batch_last.id = UINT_MAX;
2496 	} else {
2497 		/*
2498 		 * Put everything in free lists. Note that
2499 		 * next_avail_idx is sufficient with IN_ORDER so
2500 		 * free_head is unused.
2501 		 */
2502 		vq->free_head = 0;
2503 	}
2504 }
virtqueue_reset_packed(struct vring_virtqueue * vq)2505 static void virtqueue_reset_packed(struct vring_virtqueue *vq)
2506 {
2507 	memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
2508 	memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
2509 
2510 	/* we need to reset the desc.flags. For more, see is_used_desc_packed() */
2511 	memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
2512 	virtqueue_init(vq, vq->packed.vring.num);
2513 	virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
2514 }
2515 
2516 static const struct virtqueue_ops packed_ops;
2517 
__vring_new_virtqueue_packed(unsigned int index,struct vring_virtqueue_packed * vring_packed,struct virtio_device * vdev,bool weak_barriers,bool context,bool (* notify)(struct virtqueue *),void (* callback)(struct virtqueue *),const char * name,union virtio_map map)2518 static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index,
2519 					       struct vring_virtqueue_packed *vring_packed,
2520 					       struct virtio_device *vdev,
2521 					       bool weak_barriers,
2522 					       bool context,
2523 					       bool (*notify)(struct virtqueue *),
2524 					       void (*callback)(struct virtqueue *),
2525 					       const char *name,
2526 					       union virtio_map map)
2527 {
2528 	struct vring_virtqueue *vq;
2529 	int err;
2530 
2531 	vq = kmalloc_obj(*vq, GFP_KERNEL);
2532 	if (!vq)
2533 		return NULL;
2534 
2535 	vq->vq.callback = callback;
2536 	vq->vq.vdev = vdev;
2537 	vq->vq.name = name;
2538 	vq->vq.index = index;
2539 	vq->vq.reset = false;
2540 	vq->we_own_ring = false;
2541 	vq->notify = notify;
2542 	vq->weak_barriers = weak_barriers;
2543 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2544 	vq->broken = true;
2545 #else
2546 	vq->broken = false;
2547 #endif
2548 	vq->map = map;
2549 	vq->use_map_api = vring_use_map_api(vdev);
2550 
2551 	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2552 		!context;
2553 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2554 	vq->layout = virtio_has_feature(vdev, VIRTIO_F_IN_ORDER) ?
2555 		     VQ_LAYOUT_PACKED_IN_ORDER : VQ_LAYOUT_PACKED;
2556 
2557 	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2558 		vq->weak_barriers = false;
2559 
2560 	err = vring_alloc_state_extra_packed(vring_packed);
2561 	if (err) {
2562 		kfree(vq);
2563 		return NULL;
2564 	}
2565 
2566 	virtqueue_vring_init_packed(vring_packed, !!callback);
2567 
2568 	virtqueue_init(vq, vring_packed->vring.num);
2569 	virtqueue_vring_attach_packed(vq, vring_packed);
2570 
2571 	spin_lock(&vdev->vqs_list_lock);
2572 	list_add_tail(&vq->vq.list, &vdev->vqs);
2573 	spin_unlock(&vdev->vqs_list_lock);
2574 	return &vq->vq;
2575 }
2576 
vring_create_virtqueue_packed(unsigned int index,unsigned int num,unsigned int vring_align,struct virtio_device * vdev,bool weak_barriers,bool may_reduce_num,bool context,bool (* notify)(struct virtqueue *),void (* callback)(struct virtqueue *),const char * name,union virtio_map map)2577 static struct virtqueue *vring_create_virtqueue_packed(
2578 	unsigned int index,
2579 	unsigned int num,
2580 	unsigned int vring_align,
2581 	struct virtio_device *vdev,
2582 	bool weak_barriers,
2583 	bool may_reduce_num,
2584 	bool context,
2585 	bool (*notify)(struct virtqueue *),
2586 	void (*callback)(struct virtqueue *),
2587 	const char *name,
2588 	union virtio_map map)
2589 {
2590 	struct vring_virtqueue_packed vring_packed = {};
2591 	struct virtqueue *vq;
2592 
2593 	if (vring_alloc_queue_packed(&vring_packed, vdev, num, map))
2594 		return NULL;
2595 
2596 	vq = __vring_new_virtqueue_packed(index, &vring_packed, vdev, weak_barriers,
2597 					context, notify, callback, name, map);
2598 	if (!vq) {
2599 		vring_free_packed(&vring_packed, vdev, map);
2600 		return NULL;
2601 	}
2602 
2603 	to_vvq(vq)->we_own_ring = true;
2604 
2605 	return vq;
2606 }
2607 
virtqueue_resize_packed(struct vring_virtqueue * vq,u32 num)2608 static int virtqueue_resize_packed(struct vring_virtqueue *vq, u32 num)
2609 {
2610 	struct vring_virtqueue_packed vring_packed = {};
2611 	struct virtio_device *vdev = vq->vq.vdev;
2612 	int err;
2613 
2614 	if (vring_alloc_queue_packed(&vring_packed, vdev, num, vq->map))
2615 		goto err_ring;
2616 
2617 	err = vring_alloc_state_extra_packed(&vring_packed);
2618 	if (err)
2619 		goto err_state_extra;
2620 
2621 	vring_free(&vq->vq);
2622 
2623 	virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
2624 
2625 	virtqueue_init(vq, vring_packed.vring.num);
2626 	virtqueue_vring_attach_packed(vq, &vring_packed);
2627 
2628 	return 0;
2629 
2630 err_state_extra:
2631 	vring_free_packed(&vring_packed, vdev, vq->map);
2632 err_ring:
2633 	virtqueue_reset_packed(vq);
2634 	return -ENOMEM;
2635 }
2636 
2637 static const struct virtqueue_ops split_ops = {
2638 	.add = virtqueue_add_split,
2639 	.get = virtqueue_get_buf_ctx_split,
2640 	.kick_prepare = virtqueue_kick_prepare_split,
2641 	.disable_cb = virtqueue_disable_cb_split,
2642 	.enable_cb_delayed = virtqueue_enable_cb_delayed_split,
2643 	.enable_cb_prepare = virtqueue_enable_cb_prepare_split,
2644 	.poll = virtqueue_poll_split,
2645 	.detach_unused_buf = virtqueue_detach_unused_buf_split,
2646 	.more_used = more_used_split,
2647 	.resize = virtqueue_resize_split,
2648 	.reset = virtqueue_reset_split,
2649 };
2650 
2651 static const struct virtqueue_ops packed_ops = {
2652 	.add = virtqueue_add_packed,
2653 	.get = virtqueue_get_buf_ctx_packed,
2654 	.kick_prepare = virtqueue_kick_prepare_packed,
2655 	.disable_cb = virtqueue_disable_cb_packed,
2656 	.enable_cb_delayed = virtqueue_enable_cb_delayed_packed,
2657 	.enable_cb_prepare = virtqueue_enable_cb_prepare_packed,
2658 	.poll = virtqueue_poll_packed,
2659 	.detach_unused_buf = virtqueue_detach_unused_buf_packed,
2660 	.more_used = more_used_packed,
2661 	.resize = virtqueue_resize_packed,
2662 	.reset = virtqueue_reset_packed,
2663 };
2664 
2665 static const struct virtqueue_ops split_in_order_ops = {
2666 	.add = virtqueue_add_split,
2667 	.get = virtqueue_get_buf_ctx_split_in_order,
2668 	.kick_prepare = virtqueue_kick_prepare_split,
2669 	.disable_cb = virtqueue_disable_cb_split,
2670 	.enable_cb_delayed = virtqueue_enable_cb_delayed_split,
2671 	.enable_cb_prepare = virtqueue_enable_cb_prepare_split,
2672 	.poll = virtqueue_poll_split,
2673 	.detach_unused_buf = virtqueue_detach_unused_buf_split,
2674 	.more_used = more_used_split_in_order,
2675 	.resize = virtqueue_resize_split,
2676 	.reset = virtqueue_reset_split,
2677 };
2678 
2679 static const struct virtqueue_ops packed_in_order_ops = {
2680 	.add = virtqueue_add_packed_in_order,
2681 	.get = virtqueue_get_buf_ctx_packed_in_order,
2682 	.kick_prepare = virtqueue_kick_prepare_packed,
2683 	.disable_cb = virtqueue_disable_cb_packed,
2684 	.enable_cb_delayed = virtqueue_enable_cb_delayed_packed,
2685 	.enable_cb_prepare = virtqueue_enable_cb_prepare_packed,
2686 	.poll = virtqueue_poll_packed,
2687 	.detach_unused_buf = virtqueue_detach_unused_buf_packed,
2688 	.more_used = more_used_packed_in_order,
2689 	.resize = virtqueue_resize_packed,
2690 	.reset = virtqueue_reset_packed,
2691 };
2692 
virtqueue_disable_and_recycle(struct virtqueue * _vq,void (* recycle)(struct virtqueue * vq,void * buf))2693 static int virtqueue_disable_and_recycle(struct virtqueue *_vq,
2694 					 void (*recycle)(struct virtqueue *vq, void *buf))
2695 {
2696 	struct vring_virtqueue *vq = to_vvq(_vq);
2697 	struct virtio_device *vdev = vq->vq.vdev;
2698 	void *buf;
2699 	int err;
2700 
2701 	if (!vq->we_own_ring)
2702 		return -EPERM;
2703 
2704 	if (!vdev->config->disable_vq_and_reset)
2705 		return -ENOENT;
2706 
2707 	if (!vdev->config->enable_vq_after_reset)
2708 		return -ENOENT;
2709 
2710 	err = vdev->config->disable_vq_and_reset(_vq);
2711 	if (err)
2712 		return err;
2713 
2714 	while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
2715 		recycle(_vq, buf);
2716 
2717 	return 0;
2718 }
2719 
virtqueue_enable_after_reset(struct virtqueue * _vq)2720 static int virtqueue_enable_after_reset(struct virtqueue *_vq)
2721 {
2722 	struct vring_virtqueue *vq = to_vvq(_vq);
2723 	struct virtio_device *vdev = vq->vq.vdev;
2724 
2725 	if (vdev->config->enable_vq_after_reset(_vq))
2726 		return -EBUSY;
2727 
2728 	return 0;
2729 }
2730 
2731 /*
2732  * Generic functions and exported symbols.
2733  */
2734 
2735 #define VIRTQUEUE_CALL(vq, op, ...)					\
2736 	({								\
2737 	typeof(vq) __VIRTQUEUE_CALL_vq = (vq);				\
2738 	typeof(split_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__)) ret;	\
2739 									\
2740 	switch (__VIRTQUEUE_CALL_vq->layout) {				\
2741 	case VQ_LAYOUT_SPLIT:						\
2742 		ret = split_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__);	\
2743 		break;							\
2744 	case VQ_LAYOUT_PACKED:						\
2745 		ret = packed_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__);\
2746 		break;							\
2747 	case VQ_LAYOUT_SPLIT_IN_ORDER:					\
2748 		ret = split_in_order_ops.op(vq, ##__VA_ARGS__);		\
2749 		break;							\
2750 	case VQ_LAYOUT_PACKED_IN_ORDER:					\
2751 		ret = packed_in_order_ops.op(vq, ##__VA_ARGS__);	\
2752 		break;							\
2753 	default:							\
2754 		BUG();							\
2755 		break;							\
2756 	}								\
2757 	ret;								\
2758 })
2759 
2760 #define VOID_VIRTQUEUE_CALL(vq, op, ...)				\
2761 	({								\
2762 	typeof(vq) __VIRTQUEUE_CALL_vq = (vq);				\
2763 									\
2764 	switch (__VIRTQUEUE_CALL_vq->layout) {				\
2765 	case VQ_LAYOUT_SPLIT:						\
2766 		split_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__);	\
2767 		break;							\
2768 	case VQ_LAYOUT_PACKED:						\
2769 		packed_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__);	\
2770 		break;							\
2771 	case VQ_LAYOUT_SPLIT_IN_ORDER:					\
2772 		split_in_order_ops.op(vq, ##__VA_ARGS__);		\
2773 		break;							\
2774 	case VQ_LAYOUT_PACKED_IN_ORDER:					\
2775 		packed_in_order_ops.op(vq, ##__VA_ARGS__);		\
2776 		break;							\
2777 	default:							\
2778 		BUG();							\
2779 		break;							\
2780 	}								\
2781 })
2782 
virtqueue_add(struct virtqueue * _vq,struct scatterlist * sgs[],unsigned int total_sg,unsigned int out_sgs,unsigned int in_sgs,void * data,void * ctx,bool premapped,gfp_t gfp,unsigned long attr)2783 static inline int virtqueue_add(struct virtqueue *_vq,
2784 				struct scatterlist *sgs[],
2785 				unsigned int total_sg,
2786 				unsigned int out_sgs,
2787 				unsigned int in_sgs,
2788 				void *data,
2789 				void *ctx,
2790 				bool premapped,
2791 				gfp_t gfp,
2792 				unsigned long attr)
2793 {
2794 	struct vring_virtqueue *vq = to_vvq(_vq);
2795 
2796 	return VIRTQUEUE_CALL(vq, add, sgs, total_sg,
2797 			      out_sgs, in_sgs, data,
2798 			      ctx, premapped, gfp, attr);
2799 }
2800 
2801 /**
2802  * virtqueue_add_sgs - expose buffers to other end
2803  * @_vq: the struct virtqueue we're talking about.
2804  * @sgs: array of terminated scatterlists.
2805  * @out_sgs: the number of scatterlists readable by other side
2806  * @in_sgs: the number of scatterlists which are writable (after readable ones)
2807  * @data: the token identifying the buffer.
2808  * @gfp: how to do memory allocations (if necessary).
2809  *
2810  * Caller must ensure we don't call this with other virtqueue operations
2811  * at the same time (except where noted).
2812  *
2813  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2814  *
2815  * NB: ENOSPC is a special code that is only returned on an attempt to add a
2816  * buffer to a full VQ. It indicates that some buffers are outstanding and that
2817  * the operation can be retried after some buffers have been used.
2818  */
virtqueue_add_sgs(struct virtqueue * _vq,struct scatterlist * sgs[],unsigned int out_sgs,unsigned int in_sgs,void * data,gfp_t gfp)2819 int virtqueue_add_sgs(struct virtqueue *_vq,
2820 		      struct scatterlist *sgs[],
2821 		      unsigned int out_sgs,
2822 		      unsigned int in_sgs,
2823 		      void *data,
2824 		      gfp_t gfp)
2825 {
2826 	unsigned int i, total_sg = 0;
2827 
2828 	/* Count them first. */
2829 	for (i = 0; i < out_sgs + in_sgs; i++) {
2830 		struct scatterlist *sg;
2831 
2832 		for (sg = sgs[i]; sg; sg = sg_next(sg))
2833 			total_sg++;
2834 	}
2835 	return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
2836 			     data, NULL, false, gfp, 0);
2837 }
2838 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
2839 
2840 /**
2841  * virtqueue_add_outbuf - expose output buffers to other end
2842  * @vq: the struct virtqueue we're talking about.
2843  * @sg: scatterlist (must be well-formed and terminated!)
2844  * @num: the number of entries in @sg readable by other side
2845  * @data: the token identifying the buffer.
2846  * @gfp: how to do memory allocations (if necessary).
2847  *
2848  * Caller must ensure we don't call this with other virtqueue operations
2849  * at the same time (except where noted).
2850  *
2851  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2852  */
virtqueue_add_outbuf(struct virtqueue * vq,struct scatterlist * sg,unsigned int num,void * data,gfp_t gfp)2853 int virtqueue_add_outbuf(struct virtqueue *vq,
2854 			 struct scatterlist *sg, unsigned int num,
2855 			 void *data,
2856 			 gfp_t gfp)
2857 {
2858 	return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, false, gfp, 0);
2859 }
2860 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
2861 
2862 /**
2863  * virtqueue_add_outbuf_premapped - expose output buffers to other end
2864  * @vq: the struct virtqueue we're talking about.
2865  * @sg: scatterlist (must be well-formed and terminated!)
2866  * @num: the number of entries in @sg readable by other side
2867  * @data: the token identifying the buffer.
2868  * @gfp: how to do memory allocations (if necessary).
2869  *
2870  * Caller must ensure we don't call this with other virtqueue operations
2871  * at the same time (except where noted).
2872  *
2873  * Return:
2874  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2875  */
virtqueue_add_outbuf_premapped(struct virtqueue * vq,struct scatterlist * sg,unsigned int num,void * data,gfp_t gfp)2876 int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
2877 				   struct scatterlist *sg, unsigned int num,
2878 				   void *data,
2879 				   gfp_t gfp)
2880 {
2881 	return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, true, gfp, 0);
2882 }
2883 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped);
2884 
2885 /**
2886  * virtqueue_add_inbuf - expose input buffers to other end
2887  * @vq: the struct virtqueue we're talking about.
2888  * @sg: scatterlist (must be well-formed and terminated!)
2889  * @num: the number of entries in @sg writable by other side
2890  * @data: the token identifying the buffer.
2891  * @gfp: how to do memory allocations (if necessary).
2892  *
2893  * Caller must ensure we don't call this with other virtqueue operations
2894  * at the same time (except where noted).
2895  *
2896  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2897  */
virtqueue_add_inbuf(struct virtqueue * vq,struct scatterlist * sg,unsigned int num,void * data,gfp_t gfp)2898 int virtqueue_add_inbuf(struct virtqueue *vq,
2899 			struct scatterlist *sg, unsigned int num,
2900 			void *data,
2901 			gfp_t gfp)
2902 {
2903 	return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp, 0);
2904 }
2905 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
2906 
2907 /**
2908  * virtqueue_add_inbuf_cache_clean - expose input buffers with cache clean
2909  * @vq: the struct virtqueue we're talking about.
2910  * @sg: scatterlist (must be well-formed and terminated!)
2911  * @num: the number of entries in @sg writable by other side
2912  * @data: the token identifying the buffer.
2913  * @gfp: how to do memory allocations (if necessary).
2914  *
2915  * Same as virtqueue_add_inbuf but passes DMA_ATTR_CPU_CACHE_CLEAN to indicate
2916  * that the CPU will not dirty any cacheline overlapping this buffer while it
2917  * is available, and to suppress overlapping cacheline warnings in DMA debug
2918  * builds.
2919  *
2920  * Caller must ensure we don't call this with other virtqueue operations
2921  * at the same time (except where noted).
2922  *
2923  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2924  */
virtqueue_add_inbuf_cache_clean(struct virtqueue * vq,struct scatterlist * sg,unsigned int num,void * data,gfp_t gfp)2925 int virtqueue_add_inbuf_cache_clean(struct virtqueue *vq,
2926 				    struct scatterlist *sg, unsigned int num,
2927 				    void *data,
2928 				    gfp_t gfp)
2929 {
2930 	return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp,
2931 			     DMA_ATTR_CPU_CACHE_CLEAN);
2932 }
2933 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_cache_clean);
2934 
2935 /**
2936  * virtqueue_add_inbuf_ctx - expose input buffers to other end
2937  * @vq: the struct virtqueue we're talking about.
2938  * @sg: scatterlist (must be well-formed and terminated!)
2939  * @num: the number of entries in @sg writable by other side
2940  * @data: the token identifying the buffer.
2941  * @ctx: extra context for the token
2942  * @gfp: how to do memory allocations (if necessary).
2943  *
2944  * Caller must ensure we don't call this with other virtqueue operations
2945  * at the same time (except where noted).
2946  *
2947  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2948  */
virtqueue_add_inbuf_ctx(struct virtqueue * vq,struct scatterlist * sg,unsigned int num,void * data,void * ctx,gfp_t gfp)2949 int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2950 			struct scatterlist *sg, unsigned int num,
2951 			void *data,
2952 			void *ctx,
2953 			gfp_t gfp)
2954 {
2955 	return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, false, gfp, 0);
2956 }
2957 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
2958 
2959 /**
2960  * virtqueue_add_inbuf_premapped - expose input buffers to other end
2961  * @vq: the struct virtqueue we're talking about.
2962  * @sg: scatterlist (must be well-formed and terminated!)
2963  * @num: the number of entries in @sg writable by other side
2964  * @data: the token identifying the buffer.
2965  * @ctx: extra context for the token
2966  * @gfp: how to do memory allocations (if necessary).
2967  *
2968  * Caller must ensure we don't call this with other virtqueue operations
2969  * at the same time (except where noted).
2970  *
2971  * Return:
2972  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2973  */
virtqueue_add_inbuf_premapped(struct virtqueue * vq,struct scatterlist * sg,unsigned int num,void * data,void * ctx,gfp_t gfp)2974 int virtqueue_add_inbuf_premapped(struct virtqueue *vq,
2975 				  struct scatterlist *sg, unsigned int num,
2976 				  void *data,
2977 				  void *ctx,
2978 				  gfp_t gfp)
2979 {
2980 	return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, true, gfp, 0);
2981 }
2982 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_premapped);
2983 
2984 /**
2985  * virtqueue_dma_dev - get the dma dev
2986  * @_vq: the struct virtqueue we're talking about.
2987  *
2988  * Returns the dma dev. That can been used for dma api.
2989  */
virtqueue_dma_dev(struct virtqueue * _vq)2990 struct device *virtqueue_dma_dev(struct virtqueue *_vq)
2991 {
2992 	struct vring_virtqueue *vq = to_vvq(_vq);
2993 
2994 	if (vq->use_map_api && !_vq->vdev->map)
2995 		return vq->map.dma_dev;
2996 	else
2997 		return NULL;
2998 }
2999 EXPORT_SYMBOL_GPL(virtqueue_dma_dev);
3000 
3001 /**
3002  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
3003  * @_vq: the struct virtqueue
3004  *
3005  * Instead of virtqueue_kick(), you can do:
3006  *	if (virtqueue_kick_prepare(vq))
3007  *		virtqueue_notify(vq);
3008  *
3009  * This is sometimes useful because the virtqueue_kick_prepare() needs
3010  * to be serialized, but the actual virtqueue_notify() call does not.
3011  */
virtqueue_kick_prepare(struct virtqueue * _vq)3012 bool virtqueue_kick_prepare(struct virtqueue *_vq)
3013 {
3014 	struct vring_virtqueue *vq = to_vvq(_vq);
3015 
3016 	return VIRTQUEUE_CALL(vq, kick_prepare);
3017 }
3018 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
3019 
3020 /**
3021  * virtqueue_notify - second half of split virtqueue_kick call.
3022  * @_vq: the struct virtqueue
3023  *
3024  * This does not need to be serialized.
3025  *
3026  * Returns false if host notify failed or queue is broken, otherwise true.
3027  */
virtqueue_notify(struct virtqueue * _vq)3028 bool virtqueue_notify(struct virtqueue *_vq)
3029 {
3030 	struct vring_virtqueue *vq = to_vvq(_vq);
3031 
3032 	if (unlikely(vq->broken))
3033 		return false;
3034 
3035 	/* Prod other side to tell it about changes. */
3036 	if (!vq->notify(_vq)) {
3037 		vq->broken = true;
3038 		return false;
3039 	}
3040 	return true;
3041 }
3042 EXPORT_SYMBOL_GPL(virtqueue_notify);
3043 
3044 /**
3045  * virtqueue_kick - update after add_buf
3046  * @vq: the struct virtqueue
3047  *
3048  * After one or more virtqueue_add_* calls, invoke this to kick
3049  * the other side.
3050  *
3051  * Caller must ensure we don't call this with other virtqueue
3052  * operations at the same time (except where noted).
3053  *
3054  * Returns false if kick failed, otherwise true.
3055  */
virtqueue_kick(struct virtqueue * vq)3056 bool virtqueue_kick(struct virtqueue *vq)
3057 {
3058 	if (virtqueue_kick_prepare(vq))
3059 		return virtqueue_notify(vq);
3060 	return true;
3061 }
3062 EXPORT_SYMBOL_GPL(virtqueue_kick);
3063 
3064 /**
3065  * virtqueue_get_buf_ctx - get the next used buffer
3066  * @_vq: the struct virtqueue we're talking about.
3067  * @len: the length written into the buffer
3068  * @ctx: extra context for the token
3069  *
3070  * If the device wrote data into the buffer, @len will be set to the
3071  * amount written.  This means you don't need to clear the buffer
3072  * beforehand to ensure there's no data leakage in the case of short
3073  * writes.
3074  *
3075  * Caller must ensure we don't call this with other virtqueue
3076  * operations at the same time (except where noted).
3077  *
3078  * Returns NULL if there are no used buffers, or the "data" token
3079  * handed to virtqueue_add_*().
3080  */
virtqueue_get_buf_ctx(struct virtqueue * _vq,unsigned int * len,void ** ctx)3081 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
3082 			    void **ctx)
3083 {
3084 	struct vring_virtqueue *vq = to_vvq(_vq);
3085 
3086 	return VIRTQUEUE_CALL(vq, get, len, ctx);
3087 }
3088 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
3089 
virtqueue_get_buf(struct virtqueue * _vq,unsigned int * len)3090 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
3091 {
3092 	return virtqueue_get_buf_ctx(_vq, len, NULL);
3093 }
3094 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
3095 /**
3096  * virtqueue_disable_cb - disable callbacks
3097  * @_vq: the struct virtqueue we're talking about.
3098  *
3099  * Note that this is not necessarily synchronous, hence unreliable and only
3100  * useful as an optimization.
3101  *
3102  * Unlike other operations, this need not be serialized.
3103  */
virtqueue_disable_cb(struct virtqueue * _vq)3104 void virtqueue_disable_cb(struct virtqueue *_vq)
3105 {
3106 	struct vring_virtqueue *vq = to_vvq(_vq);
3107 
3108 	VOID_VIRTQUEUE_CALL(vq, disable_cb);
3109 }
3110 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
3111 
3112 /**
3113  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
3114  * @_vq: the struct virtqueue we're talking about.
3115  *
3116  * This re-enables callbacks; it returns current queue state
3117  * in an opaque unsigned value. This value should be later tested by
3118  * virtqueue_poll, to detect a possible race between the driver checking for
3119  * more work, and enabling callbacks.
3120  *
3121  * Caller must ensure we don't call this with other virtqueue
3122  * operations at the same time (except where noted).
3123  */
virtqueue_enable_cb_prepare(struct virtqueue * _vq)3124 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
3125 {
3126 	struct vring_virtqueue *vq = to_vvq(_vq);
3127 
3128 	if (vq->event_triggered)
3129 		vq->event_triggered = false;
3130 
3131 	return VIRTQUEUE_CALL(vq, enable_cb_prepare);
3132 }
3133 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
3134 
3135 /**
3136  * virtqueue_poll - query pending used buffers
3137  * @_vq: the struct virtqueue we're talking about.
3138  * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
3139  *
3140  * Returns "true" if there are pending used buffers in the queue.
3141  *
3142  * This does not need to be serialized.
3143  */
virtqueue_poll(struct virtqueue * _vq,unsigned int last_used_idx)3144 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
3145 {
3146 	struct vring_virtqueue *vq = to_vvq(_vq);
3147 
3148 	if (unlikely(vq->broken))
3149 		return false;
3150 
3151 	virtio_mb(vq->weak_barriers);
3152 
3153 	return VIRTQUEUE_CALL(vq, poll, last_used_idx);
3154 }
3155 EXPORT_SYMBOL_GPL(virtqueue_poll);
3156 
3157 /**
3158  * virtqueue_enable_cb - restart callbacks after disable_cb.
3159  * @_vq: the struct virtqueue we're talking about.
3160  *
3161  * This re-enables callbacks; it returns "false" if there are pending
3162  * buffers in the queue, to detect a possible race between the driver
3163  * checking for more work, and enabling callbacks.
3164  *
3165  * Caller must ensure we don't call this with other virtqueue
3166  * operations at the same time (except where noted).
3167  */
virtqueue_enable_cb(struct virtqueue * _vq)3168 bool virtqueue_enable_cb(struct virtqueue *_vq)
3169 {
3170 	unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
3171 
3172 	return !virtqueue_poll(_vq, last_used_idx);
3173 }
3174 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
3175 
3176 /**
3177  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
3178  * @_vq: the struct virtqueue we're talking about.
3179  *
3180  * This re-enables callbacks but hints to the other side to delay
3181  * interrupts until most of the available buffers have been processed;
3182  * it returns "false" if there are many pending buffers in the queue,
3183  * to detect a possible race between the driver checking for more work,
3184  * and enabling callbacks.
3185  *
3186  * Caller must ensure we don't call this with other virtqueue
3187  * operations at the same time (except where noted).
3188  */
virtqueue_enable_cb_delayed(struct virtqueue * _vq)3189 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
3190 {
3191 	struct vring_virtqueue *vq = to_vvq(_vq);
3192 
3193 	if (vq->event_triggered)
3194 		data_race(vq->event_triggered = false);
3195 
3196 	return VIRTQUEUE_CALL(vq, enable_cb_delayed);
3197 }
3198 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
3199 
3200 /**
3201  * virtqueue_detach_unused_buf - detach first unused buffer
3202  * @_vq: the struct virtqueue we're talking about.
3203  *
3204  * Returns NULL or the "data" token handed to virtqueue_add_*().
3205  * This is not valid on an active queue; it is useful for device
3206  * shutdown or the reset queue.
3207  */
virtqueue_detach_unused_buf(struct virtqueue * _vq)3208 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
3209 {
3210 	struct vring_virtqueue *vq = to_vvq(_vq);
3211 
3212 	return VIRTQUEUE_CALL(vq, detach_unused_buf);
3213 }
3214 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
3215 
more_used(const struct vring_virtqueue * vq)3216 static inline bool more_used(const struct vring_virtqueue *vq)
3217 {
3218 	return VIRTQUEUE_CALL(vq, more_used);
3219 }
3220 
3221 /**
3222  * vring_interrupt - notify a virtqueue on an interrupt
3223  * @irq: the IRQ number (ignored)
3224  * @_vq: the struct virtqueue to notify
3225  *
3226  * Calls the callback function of @_vq to process the virtqueue
3227  * notification.
3228  */
vring_interrupt(int irq,void * _vq)3229 irqreturn_t vring_interrupt(int irq, void *_vq)
3230 {
3231 	struct vring_virtqueue *vq = to_vvq(_vq);
3232 
3233 	if (!more_used(vq)) {
3234 		pr_debug("virtqueue interrupt with no work for %p\n", vq);
3235 		return IRQ_NONE;
3236 	}
3237 
3238 	if (unlikely(vq->broken)) {
3239 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
3240 		dev_warn_once(&vq->vq.vdev->dev,
3241 			      "virtio vring IRQ raised before DRIVER_OK");
3242 		return IRQ_NONE;
3243 #else
3244 		return IRQ_HANDLED;
3245 #endif
3246 	}
3247 
3248 	/* Just a hint for performance: so it's ok that this can be racy! */
3249 	if (vq->event)
3250 		data_race(vq->event_triggered = true);
3251 
3252 	pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
3253 	if (vq->vq.callback)
3254 		vq->vq.callback(&vq->vq);
3255 
3256 	return IRQ_HANDLED;
3257 }
3258 EXPORT_SYMBOL_GPL(vring_interrupt);
3259 
vring_create_virtqueue(unsigned int index,unsigned int num,unsigned int vring_align,struct virtio_device * vdev,bool weak_barriers,bool may_reduce_num,bool context,bool (* notify)(struct virtqueue *),void (* callback)(struct virtqueue *),const char * name)3260 struct virtqueue *vring_create_virtqueue(
3261 	unsigned int index,
3262 	unsigned int num,
3263 	unsigned int vring_align,
3264 	struct virtio_device *vdev,
3265 	bool weak_barriers,
3266 	bool may_reduce_num,
3267 	bool context,
3268 	bool (*notify)(struct virtqueue *),
3269 	void (*callback)(struct virtqueue *),
3270 	const char *name)
3271 {
3272 	union virtio_map map = {.dma_dev = vdev->dev.parent};
3273 
3274 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
3275 		return vring_create_virtqueue_packed(index, num, vring_align,
3276 				vdev, weak_barriers, may_reduce_num,
3277 				context, notify, callback, name, map);
3278 
3279 	return vring_create_virtqueue_split(index, num, vring_align,
3280 			vdev, weak_barriers, may_reduce_num,
3281 			context, notify, callback, name, map);
3282 }
3283 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
3284 
vring_create_virtqueue_map(unsigned int index,unsigned int num,unsigned int vring_align,struct virtio_device * vdev,bool weak_barriers,bool may_reduce_num,bool context,bool (* notify)(struct virtqueue *),void (* callback)(struct virtqueue *),const char * name,union virtio_map map)3285 struct virtqueue *vring_create_virtqueue_map(
3286 	unsigned int index,
3287 	unsigned int num,
3288 	unsigned int vring_align,
3289 	struct virtio_device *vdev,
3290 	bool weak_barriers,
3291 	bool may_reduce_num,
3292 	bool context,
3293 	bool (*notify)(struct virtqueue *),
3294 	void (*callback)(struct virtqueue *),
3295 	const char *name,
3296 	union virtio_map map)
3297 {
3298 
3299 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
3300 		return vring_create_virtqueue_packed(index, num, vring_align,
3301 				vdev, weak_barriers, may_reduce_num,
3302 				context, notify, callback, name, map);
3303 
3304 	return vring_create_virtqueue_split(index, num, vring_align,
3305 			vdev, weak_barriers, may_reduce_num,
3306 			context, notify, callback, name, map);
3307 }
3308 EXPORT_SYMBOL_GPL(vring_create_virtqueue_map);
3309 
3310 /**
3311  * virtqueue_resize - resize the vring of vq
3312  * @_vq: the struct virtqueue we're talking about.
3313  * @num: new ring num
3314  * @recycle: callback to recycle unused buffers
3315  * @recycle_done: callback to be invoked when recycle for all unused buffers done
3316  *
3317  * When it is really necessary to create a new vring, it will set the current vq
3318  * into the reset state. Then call the passed callback to recycle the buffer
3319  * that is no longer used. Only after the new vring is successfully created, the
3320  * old vring will be released.
3321  *
3322  * Caller must ensure we don't call this with other virtqueue operations
3323  * at the same time (except where noted).
3324  *
3325  * Returns zero or a negative error.
3326  * 0: success.
3327  * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
3328  *  vq can still work normally
3329  * -EBUSY: Failed to sync with device, vq may not work properly
3330  * -ENOENT: Transport or device not supported
3331  * -E2BIG/-EINVAL: num error
3332  * -EPERM: Operation not permitted
3333  *
3334  */
virtqueue_resize(struct virtqueue * _vq,u32 num,void (* recycle)(struct virtqueue * vq,void * buf),void (* recycle_done)(struct virtqueue * vq))3335 int virtqueue_resize(struct virtqueue *_vq, u32 num,
3336 		     void (*recycle)(struct virtqueue *vq, void *buf),
3337 		     void (*recycle_done)(struct virtqueue *vq))
3338 {
3339 	struct vring_virtqueue *vq = to_vvq(_vq);
3340 	int err, err_reset;
3341 
3342 	if (num > vq->vq.num_max)
3343 		return -E2BIG;
3344 
3345 	if (!num)
3346 		return -EINVAL;
3347 
3348 	if (virtqueue_get_vring_size(_vq) == num)
3349 		return 0;
3350 
3351 	err = virtqueue_disable_and_recycle(_vq, recycle);
3352 	if (err)
3353 		return err;
3354 	if (recycle_done)
3355 		recycle_done(_vq);
3356 
3357 	err = VIRTQUEUE_CALL(vq, resize, num);
3358 
3359 	err_reset = virtqueue_enable_after_reset(_vq);
3360 	if (err_reset)
3361 		return err_reset;
3362 
3363 	return err;
3364 }
3365 EXPORT_SYMBOL_GPL(virtqueue_resize);
3366 
3367 /**
3368  * virtqueue_reset - detach and recycle all unused buffers
3369  * @_vq: the struct virtqueue we're talking about.
3370  * @recycle: callback to recycle unused buffers
3371  * @recycle_done: callback to be invoked when recycle for all unused buffers done
3372  *
3373  * Caller must ensure we don't call this with other virtqueue operations
3374  * at the same time (except where noted).
3375  *
3376  * Returns zero or a negative error.
3377  * 0: success.
3378  * -EBUSY: Failed to sync with device, vq may not work properly
3379  * -ENOENT: Transport or device not supported
3380  * -EPERM: Operation not permitted
3381  */
virtqueue_reset(struct virtqueue * _vq,void (* recycle)(struct virtqueue * vq,void * buf),void (* recycle_done)(struct virtqueue * vq))3382 int virtqueue_reset(struct virtqueue *_vq,
3383 		    void (*recycle)(struct virtqueue *vq, void *buf),
3384 		    void (*recycle_done)(struct virtqueue *vq))
3385 {
3386 	struct vring_virtqueue *vq = to_vvq(_vq);
3387 	int err;
3388 
3389 	err = virtqueue_disable_and_recycle(_vq, recycle);
3390 	if (err)
3391 		return err;
3392 	if (recycle_done)
3393 		recycle_done(_vq);
3394 
3395 	VOID_VIRTQUEUE_CALL(vq, reset);
3396 
3397 	return virtqueue_enable_after_reset(_vq);
3398 }
3399 EXPORT_SYMBOL_GPL(virtqueue_reset);
3400 
vring_new_virtqueue(unsigned int index,unsigned int num,unsigned int vring_align,struct virtio_device * vdev,bool weak_barriers,bool context,void * pages,bool (* notify)(struct virtqueue * vq),void (* callback)(struct virtqueue * vq),const char * name)3401 struct virtqueue *vring_new_virtqueue(unsigned int index,
3402 				      unsigned int num,
3403 				      unsigned int vring_align,
3404 				      struct virtio_device *vdev,
3405 				      bool weak_barriers,
3406 				      bool context,
3407 				      void *pages,
3408 				      bool (*notify)(struct virtqueue *vq),
3409 				      void (*callback)(struct virtqueue *vq),
3410 				      const char *name)
3411 {
3412 	struct vring_virtqueue_split vring_split = {};
3413 	union virtio_map map = {.dma_dev = vdev->dev.parent};
3414 
3415 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3416 		struct vring_virtqueue_packed vring_packed = {};
3417 
3418 		vring_packed.vring.num = num;
3419 		vring_packed.vring.desc = pages;
3420 		return __vring_new_virtqueue_packed(index, &vring_packed,
3421 						    vdev, weak_barriers,
3422 						    context, notify, callback,
3423 						    name, map);
3424 	}
3425 
3426 	vring_init(&vring_split.vring, num, pages, vring_align);
3427 	return __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers,
3428 				     context, notify, callback, name,
3429 				     map);
3430 }
3431 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
3432 
vring_free(struct virtqueue * _vq)3433 static void vring_free(struct virtqueue *_vq)
3434 {
3435 	struct vring_virtqueue *vq = to_vvq(_vq);
3436 
3437 	if (vq->we_own_ring) {
3438 		if (virtqueue_is_packed(vq)) {
3439 			vring_free_queue(vq->vq.vdev,
3440 					 vq->packed.ring_size_in_bytes,
3441 					 vq->packed.vring.desc,
3442 					 vq->packed.ring_dma_addr,
3443 					 vq->map);
3444 
3445 			vring_free_queue(vq->vq.vdev,
3446 					 vq->packed.event_size_in_bytes,
3447 					 vq->packed.vring.driver,
3448 					 vq->packed.driver_event_dma_addr,
3449 					 vq->map);
3450 
3451 			vring_free_queue(vq->vq.vdev,
3452 					 vq->packed.event_size_in_bytes,
3453 					 vq->packed.vring.device,
3454 					 vq->packed.device_event_dma_addr,
3455 					 vq->map);
3456 
3457 			kfree(vq->packed.desc_state);
3458 			kfree(vq->packed.desc_extra);
3459 		} else {
3460 			vring_free_queue(vq->vq.vdev,
3461 					 vq->split.queue_size_in_bytes,
3462 					 vq->split.vring.desc,
3463 					 vq->split.queue_dma_addr,
3464 					 vq->map);
3465 		}
3466 	}
3467 	if (!virtqueue_is_packed(vq)) {
3468 		kfree(vq->split.desc_state);
3469 		kfree(vq->split.desc_extra);
3470 	}
3471 }
3472 
vring_del_virtqueue(struct virtqueue * _vq)3473 void vring_del_virtqueue(struct virtqueue *_vq)
3474 {
3475 	struct vring_virtqueue *vq = to_vvq(_vq);
3476 
3477 	spin_lock(&vq->vq.vdev->vqs_list_lock);
3478 	list_del(&_vq->list);
3479 	spin_unlock(&vq->vq.vdev->vqs_list_lock);
3480 
3481 	vring_free(_vq);
3482 
3483 	kfree(vq);
3484 }
3485 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
3486 
vring_notification_data(struct virtqueue * _vq)3487 u32 vring_notification_data(struct virtqueue *_vq)
3488 {
3489 	struct vring_virtqueue *vq = to_vvq(_vq);
3490 	u16 next;
3491 
3492 	if (virtqueue_is_packed(vq))
3493 		next = (vq->packed.next_avail_idx &
3494 				~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) |
3495 			vq->packed.avail_wrap_counter <<
3496 				VRING_PACKED_EVENT_F_WRAP_CTR;
3497 	else
3498 		next = vq->split.avail_idx_shadow;
3499 
3500 	return next << 16 | _vq->index;
3501 }
3502 EXPORT_SYMBOL_GPL(vring_notification_data);
3503 
3504 /* Manipulates transport-specific feature bits. */
vring_transport_features(struct virtio_device * vdev)3505 void vring_transport_features(struct virtio_device *vdev)
3506 {
3507 	unsigned int i;
3508 
3509 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
3510 		switch (i) {
3511 		case VIRTIO_RING_F_INDIRECT_DESC:
3512 			break;
3513 		case VIRTIO_RING_F_EVENT_IDX:
3514 			break;
3515 		case VIRTIO_F_VERSION_1:
3516 			break;
3517 		case VIRTIO_F_ACCESS_PLATFORM:
3518 			break;
3519 		case VIRTIO_F_RING_PACKED:
3520 			break;
3521 		case VIRTIO_F_ORDER_PLATFORM:
3522 			break;
3523 		case VIRTIO_F_NOTIFICATION_DATA:
3524 			break;
3525 		case VIRTIO_F_IN_ORDER:
3526 			break;
3527 		default:
3528 			/* We don't understand this bit. */
3529 			__virtio_clear_bit(vdev, i);
3530 		}
3531 	}
3532 }
3533 EXPORT_SYMBOL_GPL(vring_transport_features);
3534 
3535 /**
3536  * virtqueue_get_vring_size - return the size of the virtqueue's vring
3537  * @_vq: the struct virtqueue containing the vring of interest.
3538  *
3539  * Returns the size of the vring.  This is mainly used for boasting to
3540  * userspace.  Unlike other operations, this need not be serialized.
3541  */
virtqueue_get_vring_size(const struct virtqueue * _vq)3542 unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq)
3543 {
3544 
3545 	const struct vring_virtqueue *vq = to_vvq(_vq);
3546 
3547 	return virtqueue_is_packed(vq) ? vq->packed.vring.num :
3548 				      vq->split.vring.num;
3549 }
3550 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
3551 
3552 /*
3553  * This function should only be called by the core, not directly by the driver.
3554  */
__virtqueue_break(struct virtqueue * _vq)3555 void __virtqueue_break(struct virtqueue *_vq)
3556 {
3557 	struct vring_virtqueue *vq = to_vvq(_vq);
3558 
3559 	/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3560 	WRITE_ONCE(vq->broken, true);
3561 }
3562 EXPORT_SYMBOL_GPL(__virtqueue_break);
3563 
3564 /*
3565  * This function should only be called by the core, not directly by the driver.
3566  */
__virtqueue_unbreak(struct virtqueue * _vq)3567 void __virtqueue_unbreak(struct virtqueue *_vq)
3568 {
3569 	struct vring_virtqueue *vq = to_vvq(_vq);
3570 
3571 	/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3572 	WRITE_ONCE(vq->broken, false);
3573 }
3574 EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
3575 
virtqueue_is_broken(const struct virtqueue * _vq)3576 bool virtqueue_is_broken(const struct virtqueue *_vq)
3577 {
3578 	const struct vring_virtqueue *vq = to_vvq(_vq);
3579 
3580 	return READ_ONCE(vq->broken);
3581 }
3582 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
3583 
3584 /*
3585  * This should prevent the device from being used, allowing drivers to
3586  * recover.  You may need to grab appropriate locks to flush.
3587  */
virtio_break_device(struct virtio_device * dev)3588 void virtio_break_device(struct virtio_device *dev)
3589 {
3590 	struct virtqueue *_vq;
3591 
3592 	spin_lock(&dev->vqs_list_lock);
3593 	list_for_each_entry(_vq, &dev->vqs, list) {
3594 		struct vring_virtqueue *vq = to_vvq(_vq);
3595 
3596 		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3597 		WRITE_ONCE(vq->broken, true);
3598 	}
3599 	spin_unlock(&dev->vqs_list_lock);
3600 }
3601 EXPORT_SYMBOL_GPL(virtio_break_device);
3602 
3603 /*
3604  * This should allow the device to be used by the driver. You may
3605  * need to grab appropriate locks to flush the write to
3606  * vq->broken. This should only be used in some specific case e.g
3607  * (probing and restoring). This function should only be called by the
3608  * core, not directly by the driver.
3609  */
__virtio_unbreak_device(struct virtio_device * dev)3610 void __virtio_unbreak_device(struct virtio_device *dev)
3611 {
3612 	struct virtqueue *_vq;
3613 
3614 	spin_lock(&dev->vqs_list_lock);
3615 	list_for_each_entry(_vq, &dev->vqs, list) {
3616 		struct vring_virtqueue *vq = to_vvq(_vq);
3617 
3618 		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3619 		WRITE_ONCE(vq->broken, false);
3620 	}
3621 	spin_unlock(&dev->vqs_list_lock);
3622 }
3623 EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
3624 
virtqueue_get_desc_addr(const struct virtqueue * _vq)3625 dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq)
3626 {
3627 	const struct vring_virtqueue *vq = to_vvq(_vq);
3628 
3629 	BUG_ON(!vq->we_own_ring);
3630 
3631 	if (virtqueue_is_packed(vq))
3632 		return vq->packed.ring_dma_addr;
3633 
3634 	return vq->split.queue_dma_addr;
3635 }
3636 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
3637 
virtqueue_get_avail_addr(const struct virtqueue * _vq)3638 dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq)
3639 {
3640 	const struct vring_virtqueue *vq = to_vvq(_vq);
3641 
3642 	BUG_ON(!vq->we_own_ring);
3643 
3644 	if (virtqueue_is_packed(vq))
3645 		return vq->packed.driver_event_dma_addr;
3646 
3647 	return vq->split.queue_dma_addr +
3648 		((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
3649 }
3650 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
3651 
virtqueue_get_used_addr(const struct virtqueue * _vq)3652 dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq)
3653 {
3654 	const struct vring_virtqueue *vq = to_vvq(_vq);
3655 
3656 	BUG_ON(!vq->we_own_ring);
3657 
3658 	if (virtqueue_is_packed(vq))
3659 		return vq->packed.device_event_dma_addr;
3660 
3661 	return vq->split.queue_dma_addr +
3662 		((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
3663 }
3664 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
3665 
3666 /* Only available for split ring */
virtqueue_get_vring(const struct virtqueue * vq)3667 const struct vring *virtqueue_get_vring(const struct virtqueue *vq)
3668 {
3669 	return &to_vvq(vq)->split.vring;
3670 }
3671 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
3672 
3673 /**
3674  * virtqueue_map_alloc_coherent - alloc coherent mapping
3675  * @vdev: the virtio device we are talking to
3676  * @map: metadata for performing mapping
3677  * @size: the size of the buffer
3678  * @map_handle: the pointer to the mapped address
3679  * @gfp: allocation flag (GFP_XXX)
3680  *
3681  * return virtual address or NULL on error
3682  */
virtqueue_map_alloc_coherent(struct virtio_device * vdev,union virtio_map map,size_t size,dma_addr_t * map_handle,gfp_t gfp)3683 void *virtqueue_map_alloc_coherent(struct virtio_device *vdev,
3684 				   union virtio_map map,
3685 				   size_t size, dma_addr_t *map_handle,
3686 				   gfp_t gfp)
3687 {
3688 	if (vdev->map)
3689 		return vdev->map->alloc(map, size,
3690 					map_handle, gfp);
3691 	else
3692 		return dma_alloc_coherent(map.dma_dev, size,
3693 					  map_handle, gfp);
3694 }
3695 EXPORT_SYMBOL_GPL(virtqueue_map_alloc_coherent);
3696 
3697 /**
3698  * virtqueue_map_free_coherent - free coherent mapping
3699  * @vdev: the virtio device we are talking to
3700  * @map: metadata for performing mapping
3701  * @size: the size of the buffer
3702  * @vaddr: the virtual address that needs to be freed
3703  * @map_handle: the mapped address that needs to be freed
3704  *
3705  */
virtqueue_map_free_coherent(struct virtio_device * vdev,union virtio_map map,size_t size,void * vaddr,dma_addr_t map_handle)3706 void virtqueue_map_free_coherent(struct virtio_device *vdev,
3707 				 union virtio_map map, size_t size, void *vaddr,
3708 				 dma_addr_t map_handle)
3709 {
3710 	if (vdev->map)
3711 		vdev->map->free(map, size, vaddr,
3712 				map_handle, 0);
3713 	else
3714 		dma_free_coherent(map.dma_dev, size, vaddr, map_handle);
3715 }
3716 EXPORT_SYMBOL_GPL(virtqueue_map_free_coherent);
3717 
3718 /**
3719  * virtqueue_map_page_attrs - map a page to the device
3720  * @_vq: the virtqueue we are talking to
3721  * @page: the page that will be mapped by the device
3722  * @offset: the offset in the page for a buffer
3723  * @size: the buffer size
3724  * @dir: mapping direction
3725  * @attrs: mapping attributes
3726  *
3727  * Returns mapped address. Caller should check that by virtqueue_map_mapping_error().
3728  */
virtqueue_map_page_attrs(const struct virtqueue * _vq,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)3729 dma_addr_t virtqueue_map_page_attrs(const struct virtqueue *_vq,
3730 				    struct page *page,
3731 				    unsigned long offset,
3732 				    size_t size,
3733 				    enum dma_data_direction dir,
3734 				    unsigned long attrs)
3735 {
3736 	const struct vring_virtqueue *vq = to_vvq(_vq);
3737 	struct virtio_device *vdev = _vq->vdev;
3738 
3739 	if (vdev->map)
3740 		return vdev->map->map_page(vq->map,
3741 					   page, offset, size,
3742 					   dir, attrs);
3743 
3744 	return dma_map_page_attrs(vring_dma_dev(vq),
3745 				  page, offset, size,
3746 				  dir, attrs);
3747 }
3748 EXPORT_SYMBOL_GPL(virtqueue_map_page_attrs);
3749 
3750 /**
3751  * virtqueue_unmap_page_attrs - map a page to the device
3752  * @_vq: the virtqueue we are talking to
3753  * @map_handle: the mapped address
3754  * @size: the buffer size
3755  * @dir: mapping direction
3756  * @attrs: unmapping attributes
3757  */
virtqueue_unmap_page_attrs(const struct virtqueue * _vq,dma_addr_t map_handle,size_t size,enum dma_data_direction dir,unsigned long attrs)3758 void virtqueue_unmap_page_attrs(const struct virtqueue *_vq,
3759 				dma_addr_t map_handle,
3760 				size_t size, enum dma_data_direction dir,
3761 				unsigned long attrs)
3762 {
3763 	const struct vring_virtqueue *vq = to_vvq(_vq);
3764 	struct virtio_device *vdev = _vq->vdev;
3765 
3766 	if (vdev->map)
3767 		vdev->map->unmap_page(vq->map,
3768 				      map_handle, size, dir, attrs);
3769 	else
3770 		dma_unmap_page_attrs(vring_dma_dev(vq), map_handle,
3771 				     size, dir, attrs);
3772 }
3773 EXPORT_SYMBOL_GPL(virtqueue_unmap_page_attrs);
3774 
3775 /**
3776  * virtqueue_map_single_attrs - map DMA for _vq
3777  * @_vq: the struct virtqueue we're talking about.
3778  * @ptr: the pointer of the buffer to do dma
3779  * @size: the size of the buffer to do dma
3780  * @dir: DMA direction
3781  * @attrs: DMA Attrs
3782  *
3783  * The caller calls this to do dma mapping in advance. The DMA address can be
3784  * passed to this _vq when it is in pre-mapped mode.
3785  *
3786  * return mapped address. Caller should check that by virtqueue_map_mapping_error().
3787  */
virtqueue_map_single_attrs(const struct virtqueue * _vq,void * ptr,size_t size,enum dma_data_direction dir,unsigned long attrs)3788 dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr,
3789 				      size_t size,
3790 				      enum dma_data_direction dir,
3791 				      unsigned long attrs)
3792 {
3793 	const struct vring_virtqueue *vq = to_vvq(_vq);
3794 
3795 	if (!vq->use_map_api) {
3796 		kmsan_handle_dma(virt_to_phys(ptr), size, dir);
3797 		return (dma_addr_t)virt_to_phys(ptr);
3798 	}
3799 
3800 	/* DMA must never operate on areas that might be remapped. */
3801 	if (dev_WARN_ONCE(&_vq->vdev->dev, is_vmalloc_addr(ptr),
3802 			  "rejecting DMA map of vmalloc memory\n"))
3803 		return DMA_MAPPING_ERROR;
3804 
3805 	return virtqueue_map_page_attrs(&vq->vq, virt_to_page(ptr),
3806 					offset_in_page(ptr), size, dir, attrs);
3807 }
3808 EXPORT_SYMBOL_GPL(virtqueue_map_single_attrs);
3809 
3810 /**
3811  * virtqueue_unmap_single_attrs - unmap map for _vq
3812  * @_vq: the struct virtqueue we're talking about.
3813  * @addr: the dma address to unmap
3814  * @size: the size of the buffer
3815  * @dir: DMA direction
3816  * @attrs: DMA Attrs
3817  *
3818  * Unmap the address that is mapped by the virtqueue_map_* APIs.
3819  *
3820  */
virtqueue_unmap_single_attrs(const struct virtqueue * _vq,dma_addr_t addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3821 void virtqueue_unmap_single_attrs(const struct virtqueue *_vq,
3822 				  dma_addr_t addr,
3823 				  size_t size, enum dma_data_direction dir,
3824 				  unsigned long attrs)
3825 {
3826 	const struct vring_virtqueue *vq = to_vvq(_vq);
3827 
3828 	if (!vq->use_map_api)
3829 		return;
3830 
3831 	virtqueue_unmap_page_attrs(_vq, addr, size, dir, attrs);
3832 }
3833 EXPORT_SYMBOL_GPL(virtqueue_unmap_single_attrs);
3834 
3835 /**
3836  * virtqueue_map_mapping_error - check dma address
3837  * @_vq: the struct virtqueue we're talking about.
3838  * @addr: DMA address
3839  *
3840  * Returns 0 means dma valid. Other means invalid dma address.
3841  */
virtqueue_map_mapping_error(const struct virtqueue * _vq,dma_addr_t addr)3842 int virtqueue_map_mapping_error(const struct virtqueue *_vq, dma_addr_t addr)
3843 {
3844 	const struct vring_virtqueue *vq = to_vvq(_vq);
3845 
3846 	return vring_mapping_error(vq, addr);
3847 }
3848 EXPORT_SYMBOL_GPL(virtqueue_map_mapping_error);
3849 
3850 /**
3851  * virtqueue_map_need_sync - check a dma address needs sync
3852  * @_vq: the struct virtqueue we're talking about.
3853  * @addr: DMA address
3854  *
3855  * Check if the dma address mapped by the virtqueue_map_* APIs needs to be
3856  * synchronized
3857  *
3858  * return bool
3859  */
virtqueue_map_need_sync(const struct virtqueue * _vq,dma_addr_t addr)3860 bool virtqueue_map_need_sync(const struct virtqueue *_vq, dma_addr_t addr)
3861 {
3862 	const struct vring_virtqueue *vq = to_vvq(_vq);
3863 	struct virtio_device *vdev = _vq->vdev;
3864 
3865 	if (!vq->use_map_api)
3866 		return false;
3867 
3868 	if (vdev->map)
3869 		return vdev->map->need_sync(vq->map, addr);
3870 	else
3871 		return dma_need_sync(vring_dma_dev(vq), addr);
3872 }
3873 EXPORT_SYMBOL_GPL(virtqueue_map_need_sync);
3874 
3875 /**
3876  * virtqueue_map_sync_single_range_for_cpu - map sync for cpu
3877  * @_vq: the struct virtqueue we're talking about.
3878  * @addr: DMA address
3879  * @offset: DMA address offset
3880  * @size: buf size for sync
3881  * @dir: DMA direction
3882  *
3883  * Before calling this function, use virtqueue_map_need_sync() to confirm that
3884  * the DMA address really needs to be synchronized
3885  *
3886  */
virtqueue_map_sync_single_range_for_cpu(const struct virtqueue * _vq,dma_addr_t addr,unsigned long offset,size_t size,enum dma_data_direction dir)3887 void virtqueue_map_sync_single_range_for_cpu(const struct virtqueue *_vq,
3888 					     dma_addr_t addr,
3889 					     unsigned long offset, size_t size,
3890 					     enum dma_data_direction dir)
3891 {
3892 	const struct vring_virtqueue *vq = to_vvq(_vq);
3893 	struct virtio_device *vdev = _vq->vdev;
3894 
3895 	if (!vq->use_map_api)
3896 		return;
3897 
3898 	if (vdev->map)
3899 		vdev->map->sync_single_for_cpu(vq->map,
3900 					       addr + offset, size, dir);
3901 	else
3902 		dma_sync_single_range_for_cpu(vring_dma_dev(vq),
3903 					      addr, offset, size, dir);
3904 }
3905 EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_cpu);
3906 
3907 /**
3908  * virtqueue_map_sync_single_range_for_device - map sync for device
3909  * @_vq: the struct virtqueue we're talking about.
3910  * @addr: DMA address
3911  * @offset: DMA address offset
3912  * @size: buf size for sync
3913  * @dir: DMA direction
3914  *
3915  * Before calling this function, use virtqueue_map_need_sync() to confirm that
3916  * the DMA address really needs to be synchronized
3917  */
virtqueue_map_sync_single_range_for_device(const struct virtqueue * _vq,dma_addr_t addr,unsigned long offset,size_t size,enum dma_data_direction dir)3918 void virtqueue_map_sync_single_range_for_device(const struct virtqueue *_vq,
3919 						dma_addr_t addr,
3920 						unsigned long offset, size_t size,
3921 						enum dma_data_direction dir)
3922 {
3923 	const struct vring_virtqueue *vq = to_vvq(_vq);
3924 	struct virtio_device *vdev = _vq->vdev;
3925 
3926 	if (!vq->use_map_api)
3927 		return;
3928 
3929 	if (vdev->map)
3930 		vdev->map->sync_single_for_device(vq->map,
3931 						  addr + offset,
3932 						  size, dir);
3933 	else
3934 		dma_sync_single_range_for_device(vring_dma_dev(vq), addr,
3935 						 offset, size, dir);
3936 }
3937 EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_device);
3938 
3939 MODULE_DESCRIPTION("Virtio ring implementation");
3940 MODULE_LICENSE("GPL");
3941