1 /* 2 * Copyright (C) 2013-2016 Luigi Rizzo 3 * Copyright (C) 2013-2016 Giuseppe Lettieri 4 * Copyright (C) 2013-2016 Vincenzo Maffione 5 * Copyright (C) 2015 Stefano Garzarella 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 32 #ifndef NETMAP_VIRT_H 33 #define NETMAP_VIRT_H 34 35 #define NETMAP_VIRT_CSB_SIZE 4096 36 37 /* ptnetmap features */ 38 #define PTNETMAP_F_BASE 1 39 #define PTNETMAP_F_FULL 2 /* not used */ 40 #define PTNETMAP_F_VNET_HDR 4 41 42 /* 43 * ptnetmap_memdev: device used to expose memory into the guest VM 44 * 45 * These macros are used in the hypervisor frontend (QEMU, bhyve) and in the 46 * guest device driver. 47 */ 48 49 /* PCI identifiers and PCI BARs for the ptnetmap memdev 50 * and ptnetmap network interface. */ 51 #define PTNETMAP_MEMDEV_NAME "ptnetmap-memdev" 52 #define PTNETMAP_PCI_VENDOR_ID 0x3333 /* TODO change vendor_id */ 53 #define PTNETMAP_PCI_DEVICE_ID 0x0001 /* memory device */ 54 #define PTNETMAP_PCI_NETIF_ID 0x0002 /* ptnet network interface */ 55 #define PTNETMAP_IO_PCI_BAR 0 56 #define PTNETMAP_MEM_PCI_BAR 1 57 #define PTNETMAP_MSIX_PCI_BAR 2 58 59 /* Registers for the ptnetmap memdev */ 60 /* 32 bit r/o */ 61 #define PTNETMAP_IO_PCI_FEATURES 0 /* XXX should be removed */ 62 /* 32 bit r/o */ 63 #define PTNETMAP_IO_PCI_MEMSIZE 4 /* size of the netmap memory shared 64 * between guest and host */ 65 /* 16 bit r/o */ 66 #define PTNETMAP_IO_PCI_HOSTID 8 /* memory allocator ID in netmap host */ 67 #define PTNETMAP_IO_SIZE 10 68 69 /* 70 * ptnetmap configuration 71 * 72 * The hypervisor (QEMU or bhyve) sends this struct to the host netmap 73 * module through an ioctl() command when it wants to start the ptnetmap 74 * kthreads. 75 */ 76 struct ptnetmap_cfg { 77 #define PTNETMAP_CFG_FEAT_CSB 0x0001 78 #define PTNETMAP_CFG_FEAT_EVENTFD 0x0002 79 #define PTNETMAP_CFG_FEAT_IOCTL 0x0004 80 uint32_t features; 81 void *ptrings; /* ptrings inside CSB */ 82 uint32_t num_rings; /* number of entries */ 83 struct ptnet_ring_cfg entries[0]; /* per-ptring configuration */ 84 }; 85 86 /* 87 * Functions used to write ptnetmap_cfg from/to the nmreq. 88 * The user-space application writes the pointer of ptnetmap_cfg 89 * (user-space buffer) starting from nr_arg1 field, so that the kernel 90 * can read it with copyin (copy_from_user). 91 */ 92 static inline void 93 ptnetmap_write_cfg(struct nmreq *nmr, struct ptnetmap_cfg *cfg) 94 { 95 uintptr_t *nmr_ptncfg = (uintptr_t *)&nmr->nr_arg1; 96 *nmr_ptncfg = (uintptr_t)cfg; 97 } 98 99 /* ptnetmap control commands */ 100 #define PTNETMAP_PTCTL_CONFIG 1 101 #define PTNETMAP_PTCTL_FINALIZE 2 102 #define PTNETMAP_PTCTL_IFNEW 3 103 #define PTNETMAP_PTCTL_IFDELETE 4 104 #define PTNETMAP_PTCTL_RINGSCREATE 5 105 #define PTNETMAP_PTCTL_RINGSDELETE 6 106 #define PTNETMAP_PTCTL_DEREF 7 107 #define PTNETMAP_PTCTL_TXSYNC 8 108 #define PTNETMAP_PTCTL_RXSYNC 9 109 #define PTNETMAP_PTCTL_REGIF 10 110 #define PTNETMAP_PTCTL_UNREGIF 11 111 #define PTNETMAP_PTCTL_HOSTMEMID 12 112 113 114 /* I/O registers for the ptnet device. */ 115 #define PTNET_IO_PTFEAT 0 116 #define PTNET_IO_PTCTL 4 117 #define PTNET_IO_PTSTS 8 118 /* hole */ 119 #define PTNET_IO_MAC_LO 16 120 #define PTNET_IO_MAC_HI 20 121 #define PTNET_IO_CSBBAH 24 122 #define PTNET_IO_CSBBAL 28 123 #define PTNET_IO_NIFP_OFS 32 124 #define PTNET_IO_NUM_TX_RINGS 36 125 #define PTNET_IO_NUM_RX_RINGS 40 126 #define PTNET_IO_NUM_TX_SLOTS 44 127 #define PTNET_IO_NUM_RX_SLOTS 48 128 #define PTNET_IO_VNET_HDR_LEN 52 129 #define PTNET_IO_END 56 130 #define PTNET_IO_KICK_BASE 128 131 #define PTNET_IO_MASK 0xff 132 133 /* If defined, CSB is allocated by the guest, not by the host. */ 134 #define PTNET_CSB_ALLOC 135 136 /* ptnetmap ring fields shared between guest and host */ 137 struct ptnet_ring { 138 /* XXX revise the layout to minimize cache bounces. */ 139 uint32_t head; /* GW+ HR+ the head of the guest netmap_ring */ 140 uint32_t cur; /* GW+ HR+ the cur of the guest netmap_ring */ 141 uint32_t guest_need_kick; /* GW+ HR+ host-->guest notification enable */ 142 char pad[4]; 143 uint32_t hwcur; /* GR+ HW+ the hwcur of the host netmap_kring */ 144 uint32_t hwtail; /* GR+ HW+ the hwtail of the host netmap_kring */ 145 uint32_t host_need_kick; /* GR+ HW+ guest-->host notification enable */ 146 uint32_t sync_flags; /* GW+ HR+ the flags of the guest [tx|rx]sync() */ 147 }; 148 149 /* CSB for the ptnet device. */ 150 struct ptnet_csb { 151 struct ptnet_ring rings[NETMAP_VIRT_CSB_SIZE/sizeof(struct ptnet_ring)]; 152 }; 153 154 #if defined (WITH_PTNETMAP_HOST) || defined (WITH_PTNETMAP_GUEST) 155 156 /* return l_elem - r_elem with wraparound */ 157 static inline uint32_t 158 ptn_sub(uint32_t l_elem, uint32_t r_elem, uint32_t num_slots) 159 { 160 int64_t res; 161 162 res = (int64_t)(l_elem) - r_elem; 163 164 return (res < 0) ? res + num_slots : res; 165 } 166 #endif /* WITH_PTNETMAP_HOST || WITH_PTNETMAP_GUEST */ 167 168 #ifdef WITH_PTNETMAP_HOST 169 /* 170 * ptnetmap kernel thread routines 171 * */ 172 173 /* Functions to read and write CSB fields in the host */ 174 #if defined (linux) 175 #define CSB_READ(csb, field, r) (get_user(r, &csb->field)) 176 #define CSB_WRITE(csb, field, v) (put_user(v, &csb->field)) 177 #else /* ! linux */ 178 #define CSB_READ(csb, field, r) (r = fuword32(&csb->field)) 179 #define CSB_WRITE(csb, field, v) (suword32(&csb->field, v)) 180 #endif /* ! linux */ 181 182 /* 183 * HOST read/write kring pointers from/in CSB 184 */ 185 186 /* Host: Read kring pointers (head, cur, sync_flags) from CSB */ 187 static inline void 188 ptnetmap_host_read_kring_csb(struct ptnet_ring __user *ptr, 189 struct netmap_ring *g_ring, 190 uint32_t num_slots) 191 { 192 uint32_t old_head = g_ring->head, old_cur = g_ring->cur; 193 uint32_t d, inc_h, inc_c; 194 195 //mb(); /* Force memory complete before read CSB */ 196 197 /* 198 * We must first read head and then cur with a barrier in the 199 * middle, because cur can exceed head, but not vice versa. 200 * The guest must first write cur and then head with a barrier. 201 * 202 * head <= cur 203 * 204 * guest host 205 * 206 * STORE(cur) LOAD(head) 207 * mb() ----------- mb() 208 * STORE(head) LOAD(cur) 209 * 210 * This approach ensures that every head that we read is 211 * associated with the correct cur. In this way head can not exceed cur. 212 */ 213 CSB_READ(ptr, head, g_ring->head); 214 mb(); 215 CSB_READ(ptr, cur, g_ring->cur); 216 CSB_READ(ptr, sync_flags, g_ring->flags); 217 218 /* 219 * Even with the previous barrier, it is still possible that we read an 220 * updated cur and an old head. 221 * To detect this situation, we can check if the new cur overtakes 222 * the (apparently) new head. 223 */ 224 d = ptn_sub(old_cur, old_head, num_slots); /* previous distance */ 225 inc_c = ptn_sub(g_ring->cur, old_cur, num_slots); /* increase of cur */ 226 inc_h = ptn_sub(g_ring->head, old_head, num_slots); /* increase of head */ 227 228 if (unlikely(inc_c > num_slots - d + inc_h)) { /* cur overtakes head */ 229 ND(1,"ERROR cur overtakes head - old_cur: %u cur: %u old_head: %u head: %u", 230 old_cur, g_ring->cur, old_head, g_ring->head); 231 g_ring->cur = nm_prev(g_ring->head, num_slots - 1); 232 //*g_cur = *g_head; 233 } 234 } 235 236 /* Host: Write kring pointers (hwcur, hwtail) into the CSB */ 237 static inline void 238 ptnetmap_host_write_kring_csb(struct ptnet_ring __user *ptr, uint32_t hwcur, 239 uint32_t hwtail) 240 { 241 /* We must write hwtail before hwcur (see below). */ 242 CSB_WRITE(ptr, hwtail, hwtail); 243 mb(); 244 CSB_WRITE(ptr, hwcur, hwcur); 245 246 //mb(); /* Force memory complete before send notification */ 247 } 248 249 #endif /* WITH_PTNETMAP_HOST */ 250 251 #ifdef WITH_PTNETMAP_GUEST 252 /* 253 * GUEST read/write kring pointers from/in CSB. 254 * To use into device driver. 255 */ 256 257 /* Guest: Write kring pointers (cur, head) into the CSB */ 258 static inline void 259 ptnetmap_guest_write_kring_csb(struct ptnet_ring *ptr, uint32_t cur, 260 uint32_t head) 261 { 262 /* We must write cur before head for sync reason (see above) */ 263 ptr->cur = cur; 264 mb(); 265 ptr->head = head; 266 267 //mb(); /* Force memory complete before send notification */ 268 } 269 270 /* Guest: Read kring pointers (hwcur, hwtail) from CSB */ 271 static inline void 272 ptnetmap_guest_read_kring_csb(struct ptnet_ring *ptr, struct netmap_kring *kring) 273 { 274 uint32_t old_hwcur = kring->nr_hwcur, old_hwtail = kring->nr_hwtail; 275 uint32_t num_slots = kring->nkr_num_slots; 276 uint32_t d, inc_hc, inc_ht; 277 278 //mb(); /* Force memory complete before read CSB */ 279 280 /* 281 * We must first read hwcur and then hwtail with a barrier in the 282 * middle, because hwtail can exceed hwcur, but not vice versa. 283 * The host must first write hwtail and then hwcur with a barrier. 284 * 285 * hwcur <= hwtail 286 * 287 * host guest 288 * 289 * STORE(hwtail) LOAD(hwcur) 290 * mb() --------- mb() 291 * STORE(hwcur) LOAD(hwtail) 292 * 293 * This approach ensures that every hwcur that the guest reads is 294 * associated with the correct hwtail. In this way hwcur can not exceed 295 * hwtail. 296 */ 297 kring->nr_hwcur = ptr->hwcur; 298 mb(); 299 kring->nr_hwtail = ptr->hwtail; 300 301 /* 302 * Even with the previous barrier, it is still possible that we read an 303 * updated hwtail and an old hwcur. 304 * To detect this situation, we can check if the new hwtail overtakes 305 * the (apparently) new hwcur. 306 */ 307 d = ptn_sub(old_hwtail, old_hwcur, num_slots); /* previous distance */ 308 inc_ht = ptn_sub(kring->nr_hwtail, old_hwtail, num_slots); /* increase of hwtail */ 309 inc_hc = ptn_sub(kring->nr_hwcur, old_hwcur, num_slots); /* increase of hwcur */ 310 311 if (unlikely(inc_ht > num_slots - d + inc_hc)) { 312 ND(1, "ERROR hwtail overtakes hwcur - old_hwtail: %u hwtail: %u old_hwcur: %u hwcur: %u", 313 old_hwtail, kring->nr_hwtail, old_hwcur, kring->nr_hwcur); 314 kring->nr_hwtail = nm_prev(kring->nr_hwcur, num_slots - 1); 315 //kring->nr_hwtail = kring->nr_hwcur; 316 } 317 } 318 319 /* ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver */ 320 struct ptnetmap_memdev; 321 int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **); 322 void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *); 323 #endif /* WITH_PTNETMAP_GUEST */ 324 325 #endif /* NETMAP_VIRT_H */ 326