xref: /linux/tools/hv/vmbus_bufring.c (revision 447e140e66fd226350b3ce86cffc965eaae4c856)
1 // SPDX-License-Identifier: BSD-3-Clause
2 /*
3  * Copyright (c) 2009-2012,2016,2023 Microsoft Corp.
4  * Copyright (c) 2012 NetApp Inc.
5  * Copyright (c) 2012 Citrix Inc.
6  * All rights reserved.
7  */
8 
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <emmintrin.h>
12 #include <linux/limits.h>
13 #include <stdbool.h>
14 #include <stdint.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 #include <sys/uio.h>
19 #include <unistd.h>
20 #include "vmbus_bufring.h"
21 
22 /**
23  * Compiler barrier.
24  *
25  * Guarantees that operation reordering does not occur at compile time
26  * for operations directly before and after the barrier.
27  */
28 #define	rte_compiler_barrier()		({ asm volatile ("" : : : "memory"); })
29 
30 #define VMBUS_RQST_ERROR	0xFFFFFFFFFFFFFFFF
31 #define ALIGN(val, align)	((typeof(val))((val) & (~((typeof(val))((align) - 1)))))
32 
33 void *vmbus_uio_map(int *fd, int size)
34 {
35 	void *map;
36 
37 	map = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
38 	if (map == MAP_FAILED)
39 		return NULL;
40 
41 	return map;
42 }
43 
44 /* Increase bufring index by inc with wraparound */
45 static inline uint32_t vmbus_br_idxinc(uint32_t idx, uint32_t inc, uint32_t sz)
46 {
47 	idx += inc;
48 	if (idx >= sz)
49 		idx -= sz;
50 
51 	return idx;
52 }
53 
54 void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen)
55 {
56 	br->vbr = buf;
57 	br->windex = br->vbr->windex;
58 	br->dsize = blen - sizeof(struct vmbus_bufring);
59 }
60 
61 static inline __always_inline void
62 rte_smp_mb(void)
63 {
64 	asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
65 }
66 
67 static inline int
68 rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
69 {
70 	uint8_t res;
71 
72 	asm volatile("lock ; "
73 		     "cmpxchgl %[src], %[dst];"
74 		     "sete %[res];"
75 		     : [res] "=a" (res),     /* output */
76 		     [dst] "=m" (*dst)
77 		     : [src] "r" (src),      /* input */
78 		     "a" (exp),
79 		     "m" (*dst)
80 		     : "memory");            /* no-clobber list */
81 	return res;
82 }
83 
84 static inline uint32_t
85 vmbus_txbr_copyto(const struct vmbus_br *tbr, uint32_t windex,
86 		  const void *src0, uint32_t cplen)
87 {
88 	uint8_t *br_data = tbr->vbr->data;
89 	uint32_t br_dsize = tbr->dsize;
90 	const uint8_t *src = src0;
91 
92 	/* XXX use double mapping like Linux kernel? */
93 	if (cplen > br_dsize - windex) {
94 		uint32_t fraglen = br_dsize - windex;
95 
96 		/* Wrap-around detected */
97 		memcpy(br_data + windex, src, fraglen);
98 		memcpy(br_data, src + fraglen, cplen - fraglen);
99 	} else {
100 		memcpy(br_data + windex, src, cplen);
101 	}
102 
103 	return vmbus_br_idxinc(windex, cplen, br_dsize);
104 }
105 
106 /*
107  * Write scattered channel packet to TX bufring.
108  *
109  * The offset of this channel packet is written as a 64bits value
110  * immediately after this channel packet.
111  *
112  * The write goes through three stages:
113  *  1. Reserve space in ring buffer for the new data.
114  *     Writer atomically moves priv_write_index.
115  *  2. Copy the new data into the ring.
116  *  3. Update the tail of the ring (visible to host) that indicates
117  *     next read location. Writer updates write_index
118  */
119 static int
120 vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen)
121 {
122 	struct vmbus_bufring *vbr = tbr->vbr;
123 	uint32_t ring_size = tbr->dsize;
124 	uint32_t old_windex, next_windex, windex, total;
125 	uint64_t save_windex;
126 	int i;
127 
128 	total = 0;
129 	for (i = 0; i < iovlen; i++)
130 		total += iov[i].iov_len;
131 	total += sizeof(save_windex);
132 
133 	/* Reserve space in ring */
134 	do {
135 		uint32_t avail;
136 
137 		/* Get current free location */
138 		old_windex = tbr->windex;
139 
140 		/* Prevent compiler reordering this with calculation */
141 		rte_compiler_barrier();
142 
143 		avail = vmbus_br_availwrite(tbr, old_windex);
144 
145 		/* If not enough space in ring, then tell caller. */
146 		if (avail <= total)
147 			return -EAGAIN;
148 
149 		next_windex = vmbus_br_idxinc(old_windex, total, ring_size);
150 
151 		/* Atomic update of next write_index for other threads */
152 	} while (!rte_atomic32_cmpset(&tbr->windex, old_windex, next_windex));
153 
154 	/* Space from old..new is now reserved */
155 	windex = old_windex;
156 	for (i = 0; i < iovlen; i++)
157 		windex = vmbus_txbr_copyto(tbr, windex, iov[i].iov_base, iov[i].iov_len);
158 
159 	/* Set the offset of the current channel packet. */
160 	save_windex = ((uint64_t)old_windex) << 32;
161 	windex = vmbus_txbr_copyto(tbr, windex, &save_windex,
162 				   sizeof(save_windex));
163 
164 	/* The region reserved should match region used */
165 	if (windex != next_windex)
166 		return -EINVAL;
167 
168 	/* Ensure that data is available before updating host index */
169 	rte_compiler_barrier();
170 
171 	/* Checkin for our reservation. wait for our turn to update host */
172 	while (!rte_atomic32_cmpset(&vbr->windex, old_windex, next_windex))
173 		_mm_pause();
174 
175 	return 0;
176 }
177 
178 int rte_vmbus_chan_send(struct vmbus_br *txbr, uint16_t type, void *data,
179 			uint32_t dlen, uint32_t flags)
180 {
181 	struct vmbus_chanpkt pkt;
182 	unsigned int pktlen, pad_pktlen;
183 	const uint32_t hlen = sizeof(pkt);
184 	uint64_t pad = 0;
185 	struct iovec iov[3];
186 	int error;
187 
188 	pktlen = hlen + dlen;
189 	pad_pktlen = ALIGN(pktlen, sizeof(uint64_t));
190 
191 	pkt.hdr.type = type;
192 	pkt.hdr.flags = flags;
193 	pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
194 	pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
195 	pkt.hdr.xactid = VMBUS_RQST_ERROR;
196 
197 	iov[0].iov_base = &pkt;
198 	iov[0].iov_len = hlen;
199 	iov[1].iov_base = data;
200 	iov[1].iov_len = dlen;
201 	iov[2].iov_base = &pad;
202 	iov[2].iov_len = pad_pktlen - pktlen;
203 
204 	error = vmbus_txbr_write(txbr, iov, 3);
205 
206 	return error;
207 }
208 
209 static inline uint32_t
210 vmbus_rxbr_copyfrom(const struct vmbus_br *rbr, uint32_t rindex,
211 		    void *dst0, size_t cplen)
212 {
213 	const uint8_t *br_data = rbr->vbr->data;
214 	uint32_t br_dsize = rbr->dsize;
215 	uint8_t *dst = dst0;
216 
217 	if (cplen > br_dsize - rindex) {
218 		uint32_t fraglen = br_dsize - rindex;
219 
220 		/* Wrap-around detected. */
221 		memcpy(dst, br_data + rindex, fraglen);
222 		memcpy(dst + fraglen, br_data, cplen - fraglen);
223 	} else {
224 		memcpy(dst, br_data + rindex, cplen);
225 	}
226 
227 	return vmbus_br_idxinc(rindex, cplen, br_dsize);
228 }
229 
230 /* Copy data from receive ring but don't change index */
231 static int
232 vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen)
233 {
234 	uint32_t avail;
235 
236 	/*
237 	 * The requested data and the 64bits channel packet
238 	 * offset should be there at least.
239 	 */
240 	avail = vmbus_br_availread(rbr);
241 	if (avail < dlen + sizeof(uint64_t))
242 		return -EAGAIN;
243 
244 	vmbus_rxbr_copyfrom(rbr, rbr->vbr->rindex, data, dlen);
245 	return 0;
246 }
247 
248 /*
249  * Copy data from receive ring and change index
250  * NOTE:
251  * We assume (dlen + skip) == sizeof(channel packet).
252  */
253 static int
254 vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t skip)
255 {
256 	struct vmbus_bufring *vbr = rbr->vbr;
257 	uint32_t br_dsize = rbr->dsize;
258 	uint32_t rindex;
259 
260 	if (vmbus_br_availread(rbr) < dlen + skip + sizeof(uint64_t))
261 		return -EAGAIN;
262 
263 	/* Record where host was when we started read (for debug) */
264 	rbr->windex = rbr->vbr->windex;
265 
266 	/*
267 	 * Copy channel packet from RX bufring.
268 	 */
269 	rindex = vmbus_br_idxinc(rbr->vbr->rindex, skip, br_dsize);
270 	rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen);
271 
272 	/*
273 	 * Discard this channel packet's 64bits offset, which is useless to us.
274 	 */
275 	rindex = vmbus_br_idxinc(rindex, sizeof(uint64_t), br_dsize);
276 
277 	/* Update the read index _after_ the channel packet is fetched.	 */
278 	rte_compiler_barrier();
279 
280 	vbr->rindex = rindex;
281 
282 	return 0;
283 }
284 
285 int rte_vmbus_chan_recv_raw(struct vmbus_br *rxbr,
286 			    void *data, uint32_t *len)
287 {
288 	struct vmbus_chanpkt_hdr pkt;
289 	uint32_t dlen, bufferlen = *len;
290 	int error;
291 
292 	error = vmbus_rxbr_peek(rxbr, &pkt, sizeof(pkt));
293 	if (error)
294 		return error;
295 
296 	if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN))
297 		/* XXX this channel is dead actually. */
298 		return -EIO;
299 
300 	if (unlikely(pkt.hlen > pkt.tlen))
301 		return -EIO;
302 
303 	/* Length are in quad words */
304 	dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT;
305 	*len = dlen;
306 
307 	/* If caller buffer is not large enough */
308 	if (unlikely(dlen > bufferlen))
309 		return -ENOBUFS;
310 
311 	/* Read data and skip packet header */
312 	error = vmbus_rxbr_read(rxbr, data, dlen, 0);
313 	if (error)
314 		return error;
315 
316 	/* Return the number of bytes read */
317 	return dlen + sizeof(uint64_t);
318 }
319