xref: /linux/tools/virtio/ringtest/ring.c (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1 /*
2  * Copyright (C) 2016 Red Hat, Inc.
3  * Author: Michael S. Tsirkin <mst@redhat.com>
4  * This work is licensed under the terms of the GNU GPL, version 2.
5  *
6  * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
7  * signalling, unconditionally.
8  */
9 #define _GNU_SOURCE
10 #include "main.h"
11 #include <stdlib.h>
12 #include <stdio.h>
13 #include <string.h>
14 
15 /* Next - Where next entry will be written.
16  * Prev - "Next" value when event triggered previously.
17  * Event - Peer requested event after writing this entry.
18  */
19 static inline bool need_event(unsigned short event,
20 			      unsigned short next,
21 			      unsigned short prev)
22 {
23 	return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
24 }
25 
26 /* Design:
27  * Guest adds descriptors with unique index values and DESC_HW in flags.
28  * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
29  * Flags are always set last.
30  */
31 #define DESC_HW 0x1
32 
33 struct desc {
34 	unsigned short flags;
35 	unsigned short index;
36 	unsigned len;
37 	unsigned long long addr;
38 };
39 
40 /* how much padding is needed to avoid false cache sharing */
41 #define HOST_GUEST_PADDING 0x80
42 
43 /* Mostly read */
44 struct event {
45 	unsigned short kick_index;
46 	unsigned char reserved0[HOST_GUEST_PADDING - 2];
47 	unsigned short call_index;
48 	unsigned char reserved1[HOST_GUEST_PADDING - 2];
49 };
50 
51 struct data {
52 	void *buf; /* descriptor is writeable, we can't get buf from there */
53 	void *data;
54 } *data;
55 
56 struct desc *ring;
57 struct event *event;
58 
59 struct guest {
60 	unsigned avail_idx;
61 	unsigned last_used_idx;
62 	unsigned num_free;
63 	unsigned kicked_avail_idx;
64 	unsigned char reserved[HOST_GUEST_PADDING - 12];
65 } guest;
66 
67 struct host {
68 	/* we do not need to track last avail index
69 	 * unless we have more than one in flight.
70 	 */
71 	unsigned used_idx;
72 	unsigned called_used_idx;
73 	unsigned char reserved[HOST_GUEST_PADDING - 4];
74 } host;
75 
76 /* implemented by ring */
77 void alloc_ring(void)
78 {
79 	int ret;
80 	int i;
81 
82 	ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
83 	if (ret) {
84 		perror("Unable to allocate ring buffer.\n");
85 		exit(3);
86 	}
87 	event = malloc(sizeof *event);
88 	if (!event) {
89 		perror("Unable to allocate event buffer.\n");
90 		exit(3);
91 	}
92 	memset(event, 0, sizeof *event);
93 	guest.avail_idx = 0;
94 	guest.kicked_avail_idx = -1;
95 	guest.last_used_idx = 0;
96 	host.used_idx = 0;
97 	host.called_used_idx = -1;
98 	for (i = 0; i < ring_size; ++i) {
99 		struct desc desc = {
100 			.index = i,
101 		};
102 		ring[i] = desc;
103 	}
104 	guest.num_free = ring_size;
105 	data = malloc(ring_size * sizeof *data);
106 	if (!data) {
107 		perror("Unable to allocate data buffer.\n");
108 		exit(3);
109 	}
110 	memset(data, 0, ring_size * sizeof *data);
111 }
112 
113 /* guest side */
114 int add_inbuf(unsigned len, void *buf, void *datap)
115 {
116 	unsigned head, index;
117 
118 	if (!guest.num_free)
119 		return -1;
120 
121 	guest.num_free--;
122 	head = (ring_size - 1) & (guest.avail_idx++);
123 
124 	/* Start with a write. On MESI architectures this helps
125 	 * avoid a shared state with consumer that is polling this descriptor.
126 	 */
127 	ring[head].addr = (unsigned long)(void*)buf;
128 	ring[head].len = len;
129 	/* read below might bypass write above. That is OK because it's just an
130 	 * optimization. If this happens, we will get the cache line in a
131 	 * shared state which is unfortunate, but probably not worth it to
132 	 * add an explicit full barrier to avoid this.
133 	 */
134 	barrier();
135 	index = ring[head].index;
136 	data[index].buf = buf;
137 	data[index].data = datap;
138 	/* Barrier A (for pairing) */
139 	smp_release();
140 	ring[head].flags = DESC_HW;
141 
142 	return 0;
143 }
144 
145 void *get_buf(unsigned *lenp, void **bufp)
146 {
147 	unsigned head = (ring_size - 1) & guest.last_used_idx;
148 	unsigned index;
149 	void *datap;
150 
151 	if (ring[head].flags & DESC_HW)
152 		return NULL;
153 	/* Barrier B (for pairing) */
154 	smp_acquire();
155 	*lenp = ring[head].len;
156 	index = ring[head].index & (ring_size - 1);
157 	datap = data[index].data;
158 	*bufp = data[index].buf;
159 	data[index].buf = NULL;
160 	data[index].data = NULL;
161 	guest.num_free++;
162 	guest.last_used_idx++;
163 	return datap;
164 }
165 
166 void poll_used(void)
167 {
168 	unsigned head = (ring_size - 1) & guest.last_used_idx;
169 
170 	while (ring[head].flags & DESC_HW)
171 		busy_wait();
172 }
173 
174 void disable_call()
175 {
176 	/* Doing nothing to disable calls might cause
177 	 * extra interrupts, but reduces the number of cache misses.
178 	 */
179 }
180 
181 bool enable_call()
182 {
183 	unsigned head = (ring_size - 1) & guest.last_used_idx;
184 
185 	event->call_index = guest.last_used_idx;
186 	/* Flush call index write */
187 	/* Barrier D (for pairing) */
188 	smp_mb();
189 	return ring[head].flags & DESC_HW;
190 }
191 
192 void kick_available(void)
193 {
194 	/* Flush in previous flags write */
195 	/* Barrier C (for pairing) */
196 	smp_mb();
197 	if (!need_event(event->kick_index,
198 			guest.avail_idx,
199 			guest.kicked_avail_idx))
200 		return;
201 
202 	guest.kicked_avail_idx = guest.avail_idx;
203 	kick();
204 }
205 
206 /* host side */
207 void disable_kick()
208 {
209 	/* Doing nothing to disable kicks might cause
210 	 * extra interrupts, but reduces the number of cache misses.
211 	 */
212 }
213 
214 bool enable_kick()
215 {
216 	unsigned head = (ring_size - 1) & host.used_idx;
217 
218 	event->kick_index = host.used_idx;
219 	/* Barrier C (for pairing) */
220 	smp_mb();
221 	return !(ring[head].flags & DESC_HW);
222 }
223 
224 void poll_avail(void)
225 {
226 	unsigned head = (ring_size - 1) & host.used_idx;
227 
228 	while (!(ring[head].flags & DESC_HW))
229 		busy_wait();
230 }
231 
232 bool use_buf(unsigned *lenp, void **bufp)
233 {
234 	unsigned head = (ring_size - 1) & host.used_idx;
235 
236 	if (!(ring[head].flags & DESC_HW))
237 		return false;
238 
239 	/* make sure length read below is not speculated */
240 	/* Barrier A (for pairing) */
241 	smp_acquire();
242 
243 	/* simple in-order completion: we don't need
244 	 * to touch index at all. This also means we
245 	 * can just modify the descriptor in-place.
246 	 */
247 	ring[head].len--;
248 	/* Make sure len is valid before flags.
249 	 * Note: alternative is to write len and flags in one access -
250 	 * possible on 64 bit architectures but wmb is free on Intel anyway
251 	 * so I have no way to test whether it's a gain.
252 	 */
253 	/* Barrier B (for pairing) */
254 	smp_release();
255 	ring[head].flags = 0;
256 	host.used_idx++;
257 	return true;
258 }
259 
260 void call_used(void)
261 {
262 	/* Flush in previous flags write */
263 	/* Barrier D (for pairing) */
264 	smp_mb();
265 	if (!need_event(event->call_index,
266 			host.used_idx,
267 			host.called_used_idx))
268 		return;
269 
270 	host.called_used_idx = host.used_idx;
271 	call();
272 }
273