1 // SPDX-License-Identifier: BSD-3-Clause 2 /* 3 * Copyright (c) 2009-2012,2016,2023 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * Copyright (c) 2012 Citrix Inc. 6 * All rights reserved. 7 */ 8 9 #include <errno.h> 10 #include <fcntl.h> 11 #include <emmintrin.h> 12 #include <linux/limits.h> 13 #include <stdbool.h> 14 #include <stdint.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 #include <sys/uio.h> 19 #include <unistd.h> 20 #include "vmbus_bufring.h" 21 22 /** 23 * Compiler barrier. 24 * 25 * Guarantees that operation reordering does not occur at compile time 26 * for operations directly before and after the barrier. 27 */ 28 #define rte_compiler_barrier() ({ asm volatile ("" : : : "memory"); }) 29 30 #define VMBUS_RQST_ERROR 0xFFFFFFFFFFFFFFFF 31 #define ALIGN(val, align) ((typeof(val))((val) & (~((typeof(val))((align) - 1))))) 32 33 void *vmbus_uio_map(int *fd, int size) 34 { 35 void *map; 36 37 map = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0); 38 if (map == MAP_FAILED) 39 return NULL; 40 41 return map; 42 } 43 44 /* Increase bufring index by inc with wraparound */ 45 static inline uint32_t vmbus_br_idxinc(uint32_t idx, uint32_t inc, uint32_t sz) 46 { 47 idx += inc; 48 if (idx >= sz) 49 idx -= sz; 50 51 return idx; 52 } 53 54 void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen) 55 { 56 br->vbr = buf; 57 br->windex = br->vbr->windex; 58 br->dsize = blen - sizeof(struct vmbus_bufring); 59 } 60 61 static inline __always_inline void 62 rte_smp_mb(void) 63 { 64 asm volatile("lock addl $0, -128(%%rsp); " ::: "memory"); 65 } 66 67 static inline int 68 rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) 69 { 70 uint8_t res; 71 72 asm volatile("lock ; " 73 "cmpxchgl %[src], %[dst];" 74 "sete %[res];" 75 : [res] "=a" (res), /* output */ 76 [dst] "=m" (*dst) 77 : [src] "r" (src), /* input */ 78 "a" (exp), 79 "m" (*dst) 80 : "memory"); /* no-clobber list */ 81 return res; 82 } 83 84 static inline uint32_t 85 vmbus_txbr_copyto(const struct vmbus_br *tbr, uint32_t windex, 86 const void *src0, uint32_t cplen) 87 { 88 uint8_t *br_data = tbr->vbr->data; 89 uint32_t br_dsize = tbr->dsize; 90 const uint8_t *src = src0; 91 92 /* XXX use double mapping like Linux kernel? */ 93 if (cplen > br_dsize - windex) { 94 uint32_t fraglen = br_dsize - windex; 95 96 /* Wrap-around detected */ 97 memcpy(br_data + windex, src, fraglen); 98 memcpy(br_data, src + fraglen, cplen - fraglen); 99 } else { 100 memcpy(br_data + windex, src, cplen); 101 } 102 103 return vmbus_br_idxinc(windex, cplen, br_dsize); 104 } 105 106 /* 107 * Write scattered channel packet to TX bufring. 108 * 109 * The offset of this channel packet is written as a 64bits value 110 * immediately after this channel packet. 111 * 112 * The write goes through three stages: 113 * 1. Reserve space in ring buffer for the new data. 114 * Writer atomically moves priv_write_index. 115 * 2. Copy the new data into the ring. 116 * 3. Update the tail of the ring (visible to host) that indicates 117 * next read location. Writer updates write_index 118 */ 119 static int 120 vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen) 121 { 122 struct vmbus_bufring *vbr = tbr->vbr; 123 uint32_t ring_size = tbr->dsize; 124 uint32_t old_windex, next_windex, windex, total; 125 uint64_t save_windex; 126 int i; 127 128 total = 0; 129 for (i = 0; i < iovlen; i++) 130 total += iov[i].iov_len; 131 total += sizeof(save_windex); 132 133 /* Reserve space in ring */ 134 do { 135 uint32_t avail; 136 137 /* Get current free location */ 138 old_windex = tbr->windex; 139 140 /* Prevent compiler reordering this with calculation */ 141 rte_compiler_barrier(); 142 143 avail = vmbus_br_availwrite(tbr, old_windex); 144 145 /* If not enough space in ring, then tell caller. */ 146 if (avail <= total) 147 return -EAGAIN; 148 149 next_windex = vmbus_br_idxinc(old_windex, total, ring_size); 150 151 /* Atomic update of next write_index for other threads */ 152 } while (!rte_atomic32_cmpset(&tbr->windex, old_windex, next_windex)); 153 154 /* Space from old..new is now reserved */ 155 windex = old_windex; 156 for (i = 0; i < iovlen; i++) 157 windex = vmbus_txbr_copyto(tbr, windex, iov[i].iov_base, iov[i].iov_len); 158 159 /* Set the offset of the current channel packet. */ 160 save_windex = ((uint64_t)old_windex) << 32; 161 windex = vmbus_txbr_copyto(tbr, windex, &save_windex, 162 sizeof(save_windex)); 163 164 /* The region reserved should match region used */ 165 if (windex != next_windex) 166 return -EINVAL; 167 168 /* Ensure that data is available before updating host index */ 169 rte_compiler_barrier(); 170 171 /* Checkin for our reservation. wait for our turn to update host */ 172 while (!rte_atomic32_cmpset(&vbr->windex, old_windex, next_windex)) 173 _mm_pause(); 174 175 return 0; 176 } 177 178 int rte_vmbus_chan_send(struct vmbus_br *txbr, uint16_t type, void *data, 179 uint32_t dlen, uint32_t flags) 180 { 181 struct vmbus_chanpkt pkt; 182 unsigned int pktlen, pad_pktlen; 183 const uint32_t hlen = sizeof(pkt); 184 uint64_t pad = 0; 185 struct iovec iov[3]; 186 int error; 187 188 pktlen = hlen + dlen; 189 pad_pktlen = ALIGN(pktlen, sizeof(uint64_t)); 190 191 pkt.hdr.type = type; 192 pkt.hdr.flags = flags; 193 pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT; 194 pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT; 195 pkt.hdr.xactid = VMBUS_RQST_ERROR; 196 197 iov[0].iov_base = &pkt; 198 iov[0].iov_len = hlen; 199 iov[1].iov_base = data; 200 iov[1].iov_len = dlen; 201 iov[2].iov_base = &pad; 202 iov[2].iov_len = pad_pktlen - pktlen; 203 204 error = vmbus_txbr_write(txbr, iov, 3); 205 206 return error; 207 } 208 209 static inline uint32_t 210 vmbus_rxbr_copyfrom(const struct vmbus_br *rbr, uint32_t rindex, 211 void *dst0, size_t cplen) 212 { 213 const uint8_t *br_data = rbr->vbr->data; 214 uint32_t br_dsize = rbr->dsize; 215 uint8_t *dst = dst0; 216 217 if (cplen > br_dsize - rindex) { 218 uint32_t fraglen = br_dsize - rindex; 219 220 /* Wrap-around detected. */ 221 memcpy(dst, br_data + rindex, fraglen); 222 memcpy(dst + fraglen, br_data, cplen - fraglen); 223 } else { 224 memcpy(dst, br_data + rindex, cplen); 225 } 226 227 return vmbus_br_idxinc(rindex, cplen, br_dsize); 228 } 229 230 /* Copy data from receive ring but don't change index */ 231 static int 232 vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen) 233 { 234 uint32_t avail; 235 236 /* 237 * The requested data and the 64bits channel packet 238 * offset should be there at least. 239 */ 240 avail = vmbus_br_availread(rbr); 241 if (avail < dlen + sizeof(uint64_t)) 242 return -EAGAIN; 243 244 vmbus_rxbr_copyfrom(rbr, rbr->vbr->rindex, data, dlen); 245 return 0; 246 } 247 248 /* 249 * Copy data from receive ring and change index 250 * NOTE: 251 * We assume (dlen + skip) == sizeof(channel packet). 252 */ 253 static int 254 vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t skip) 255 { 256 struct vmbus_bufring *vbr = rbr->vbr; 257 uint32_t br_dsize = rbr->dsize; 258 uint32_t rindex; 259 260 if (vmbus_br_availread(rbr) < dlen + skip + sizeof(uint64_t)) 261 return -EAGAIN; 262 263 /* Record where host was when we started read (for debug) */ 264 rbr->windex = rbr->vbr->windex; 265 266 /* 267 * Copy channel packet from RX bufring. 268 */ 269 rindex = vmbus_br_idxinc(rbr->vbr->rindex, skip, br_dsize); 270 rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen); 271 272 /* 273 * Discard this channel packet's 64bits offset, which is useless to us. 274 */ 275 rindex = vmbus_br_idxinc(rindex, sizeof(uint64_t), br_dsize); 276 277 /* Update the read index _after_ the channel packet is fetched. */ 278 rte_compiler_barrier(); 279 280 vbr->rindex = rindex; 281 282 return 0; 283 } 284 285 int rte_vmbus_chan_recv_raw(struct vmbus_br *rxbr, 286 void *data, uint32_t *len) 287 { 288 struct vmbus_chanpkt_hdr pkt; 289 uint32_t dlen, bufferlen = *len; 290 int error; 291 292 error = vmbus_rxbr_peek(rxbr, &pkt, sizeof(pkt)); 293 if (error) 294 return error; 295 296 if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) 297 /* XXX this channel is dead actually. */ 298 return -EIO; 299 300 if (unlikely(pkt.hlen > pkt.tlen)) 301 return -EIO; 302 303 /* Length are in quad words */ 304 dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT; 305 *len = dlen; 306 307 /* If caller buffer is not large enough */ 308 if (unlikely(dlen > bufferlen)) 309 return -ENOBUFS; 310 311 /* Read data and skip packet header */ 312 error = vmbus_rxbr_read(rxbr, data, dlen, 0); 313 if (error) 314 return error; 315 316 /* Return the number of bytes read */ 317 return dlen + sizeof(uint64_t); 318 } 319