1 /* 2 * Copyright (C) 2013-2016 Luigi Rizzo 3 * Copyright (C) 2013-2016 Giuseppe Lettieri 4 * Copyright (C) 2013-2016 Vincenzo Maffione 5 * Copyright (C) 2015 Stefano Garzarella 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 32 #ifndef NETMAP_VIRT_H 33 #define NETMAP_VIRT_H 34 35 /* 36 * ptnetmap_memdev: device used to expose memory into the guest VM 37 * 38 * These macros are used in the hypervisor frontend (QEMU, bhyve) and in the 39 * guest device driver. 40 */ 41 42 /* PCI identifiers and PCI BARs for the ptnetmap memdev 43 * and ptnetmap network interface. */ 44 #define PTNETMAP_MEMDEV_NAME "ptnetmap-memdev" 45 #define PTNETMAP_PCI_VENDOR_ID 0x1b36 /* QEMU virtual devices */ 46 #define PTNETMAP_PCI_DEVICE_ID 0x000c /* memory device */ 47 #define PTNETMAP_PCI_NETIF_ID 0x000d /* ptnet network interface */ 48 #define PTNETMAP_IO_PCI_BAR 0 49 #define PTNETMAP_MEM_PCI_BAR 1 50 #define PTNETMAP_MSIX_PCI_BAR 2 51 52 /* Registers for the ptnetmap memdev */ 53 #define PTNET_MDEV_IO_MEMSIZE_LO 0 /* netmap memory size (low) */ 54 #define PTNET_MDEV_IO_MEMSIZE_HI 4 /* netmap_memory_size (high) */ 55 #define PTNET_MDEV_IO_MEMID 8 /* memory allocator ID in the host */ 56 #define PTNET_MDEV_IO_IF_POOL_OFS 64 57 #define PTNET_MDEV_IO_IF_POOL_OBJNUM 68 58 #define PTNET_MDEV_IO_IF_POOL_OBJSZ 72 59 #define PTNET_MDEV_IO_RING_POOL_OFS 76 60 #define PTNET_MDEV_IO_RING_POOL_OBJNUM 80 61 #define PTNET_MDEV_IO_RING_POOL_OBJSZ 84 62 #define PTNET_MDEV_IO_BUF_POOL_OFS 88 63 #define PTNET_MDEV_IO_BUF_POOL_OBJNUM 92 64 #define PTNET_MDEV_IO_BUF_POOL_OBJSZ 96 65 #define PTNET_MDEV_IO_END 100 66 67 /* 68 * ptnetmap configuration 69 * 70 * The ptnet kthreads (running in host kernel-space) need to be configured 71 * in order to know how to intercept guest kicks (I/O register writes) and 72 * how to inject MSI-X interrupts to the guest. The configuration may vary 73 * depending on the hypervisor. Currently, we support QEMU/KVM on Linux and 74 * and bhyve on FreeBSD. 75 * The configuration is passed by the hypervisor to the host netmap module 76 * by means of an ioctl() with nr_cmd=NETMAP_PT_HOST_CREATE, and it is 77 * specified by the ptnetmap_cfg struct. This struct contains an header 78 * with general informations and an array of entries whose size depends 79 * on the hypervisor. The NETMAP_PT_HOST_CREATE command is issued every 80 * time the kthreads are started. 81 */ 82 struct ptnetmap_cfg { 83 #define PTNETMAP_CFGTYPE_QEMU 0x1 84 #define PTNETMAP_CFGTYPE_BHYVE 0x2 85 uint16_t cfgtype; /* how to interpret the cfg entries */ 86 uint16_t entry_size; /* size of a config entry */ 87 uint32_t num_rings; /* number of config entries */ 88 void *csb_gh; /* CSB for guest --> host communication */ 89 void *csb_hg; /* CSB for host --> guest communication */ 90 /* Configuration entries are allocated right after the struct. */ 91 }; 92 93 /* Configuration of a ptnetmap ring for QEMU. */ 94 struct ptnetmap_cfgentry_qemu { 95 uint32_t ioeventfd; /* to intercept guest register access */ 96 uint32_t irqfd; /* to inject guest interrupts */ 97 }; 98 99 /* Configuration of a ptnetmap ring for bhyve. */ 100 struct ptnetmap_cfgentry_bhyve { 101 uint64_t wchan; /* tsleep() parameter, to wake up kthread */ 102 uint32_t ioctl_fd; /* ioctl fd */ 103 /* ioctl parameters to send irq */ 104 uint32_t ioctl_cmd; 105 /* vmm.ko MSIX parameters for IOCTL */ 106 struct { 107 uint64_t msg_data; 108 uint64_t addr; 109 } ioctl_data; 110 }; 111 112 /* 113 * Structure filled-in by the kernel when asked for allocator info 114 * through NETMAP_POOLS_INFO_GET. Used by hypervisors supporting 115 * ptnetmap. 116 */ 117 struct netmap_pools_info { 118 uint64_t memsize; /* same as nmr->nr_memsize */ 119 uint32_t memid; /* same as nmr->nr_arg2 */ 120 uint32_t if_pool_offset; 121 uint32_t if_pool_objtotal; 122 uint32_t if_pool_objsize; 123 uint32_t ring_pool_offset; 124 uint32_t ring_pool_objtotal; 125 uint32_t ring_pool_objsize; 126 uint32_t buf_pool_offset; 127 uint32_t buf_pool_objtotal; 128 uint32_t buf_pool_objsize; 129 }; 130 131 /* 132 * Pass a pointer to a userspace buffer to be passed to kernelspace for write 133 * or read. Used by NETMAP_PT_HOST_CREATE and NETMAP_POOLS_INFO_GET. 134 */ 135 static inline void 136 nmreq_pointer_put(struct nmreq *nmr, void *userptr) 137 { 138 uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1; 139 *pp = (uintptr_t)userptr; 140 } 141 142 static inline void * 143 nmreq_pointer_get(const struct nmreq *nmr) 144 { 145 const uintptr_t * pp = (const uintptr_t *)&nmr->nr_arg1; 146 return (void *)*pp; 147 } 148 149 /* ptnetmap features */ 150 #define PTNETMAP_F_VNET_HDR 1 151 152 /* I/O registers for the ptnet device. */ 153 #define PTNET_IO_PTFEAT 0 154 #define PTNET_IO_PTCTL 4 155 #define PTNET_IO_MAC_LO 8 156 #define PTNET_IO_MAC_HI 12 157 #define PTNET_IO_CSBBAH 16 /* deprecated */ 158 #define PTNET_IO_CSBBAL 20 /* deprecated */ 159 #define PTNET_IO_NIFP_OFS 24 160 #define PTNET_IO_NUM_TX_RINGS 28 161 #define PTNET_IO_NUM_RX_RINGS 32 162 #define PTNET_IO_NUM_TX_SLOTS 36 163 #define PTNET_IO_NUM_RX_SLOTS 40 164 #define PTNET_IO_VNET_HDR_LEN 44 165 #define PTNET_IO_HOSTMEMID 48 166 #define PTNET_IO_CSB_GH_BAH 52 167 #define PTNET_IO_CSB_GH_BAL 56 168 #define PTNET_IO_CSB_HG_BAH 60 169 #define PTNET_IO_CSB_HG_BAL 64 170 #define PTNET_IO_END 68 171 #define PTNET_IO_KICK_BASE 128 172 #define PTNET_IO_MASK 0xff 173 174 /* ptnetmap control commands (values for PTCTL register) */ 175 #define PTNETMAP_PTCTL_CREATE 1 176 #define PTNETMAP_PTCTL_DELETE 2 177 178 /* ptnetmap synchronization variables shared between guest and host */ 179 struct ptnet_csb_gh { 180 uint32_t head; /* GW+ HR+ the head of the guest netmap_ring */ 181 uint32_t cur; /* GW+ HR+ the cur of the guest netmap_ring */ 182 uint32_t guest_need_kick; /* GW+ HR+ host-->guest notification enable */ 183 uint32_t sync_flags; /* GW+ HR+ the flags of the guest [tx|rx]sync() */ 184 char pad[48]; /* pad to a 64 bytes cacheline */ 185 }; 186 struct ptnet_csb_hg { 187 uint32_t hwcur; /* GR+ HW+ the hwcur of the host netmap_kring */ 188 uint32_t hwtail; /* GR+ HW+ the hwtail of the host netmap_kring */ 189 uint32_t host_need_kick; /* GR+ HW+ guest-->host notification enable */ 190 char pad[4+48]; 191 }; 192 193 #ifdef WITH_PTNETMAP_GUEST 194 195 /* ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver */ 196 struct ptnetmap_memdev; 197 int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **, 198 uint64_t *); 199 void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *); 200 uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *, unsigned int); 201 202 /* Guest driver: Write kring pointers (cur, head) to the CSB. 203 * This routine is coupled with ptnetmap_host_read_kring_csb(). */ 204 static inline void 205 ptnetmap_guest_write_kring_csb(struct ptnet_csb_gh *ptr, uint32_t cur, 206 uint32_t head) 207 { 208 /* 209 * We need to write cur and head to the CSB but we cannot do it atomically. 210 * There is no way we can prevent the host from reading the updated value 211 * of one of the two and the old value of the other. However, if we make 212 * sure that the host never reads a value of head more recent than the 213 * value of cur we are safe. We can allow the host to read a value of cur 214 * more recent than the value of head, since in the netmap ring cur can be 215 * ahead of head and cur cannot wrap around head because it must be behind 216 * tail. Inverting the order of writes below could instead result into the 217 * host to think head went ahead of cur, which would cause the sync 218 * prologue to fail. 219 * 220 * The following memory barrier scheme is used to make this happen: 221 * 222 * Guest Host 223 * 224 * STORE(cur) LOAD(head) 225 * mb() <-----------> mb() 226 * STORE(head) LOAD(cur) 227 */ 228 ptr->cur = cur; 229 mb(); 230 ptr->head = head; 231 } 232 233 /* Guest driver: Read kring pointers (hwcur, hwtail) from the CSB. 234 * This routine is coupled with ptnetmap_host_write_kring_csb(). */ 235 static inline void 236 ptnetmap_guest_read_kring_csb(struct ptnet_csb_hg *pthg, struct netmap_kring *kring) 237 { 238 /* 239 * We place a memory barrier to make sure that the update of hwtail never 240 * overtakes the update of hwcur. 241 * (see explanation in ptnetmap_host_write_kring_csb). 242 */ 243 kring->nr_hwtail = pthg->hwtail; 244 mb(); 245 kring->nr_hwcur = pthg->hwcur; 246 } 247 248 #endif /* WITH_PTNETMAP_GUEST */ 249 250 #ifdef WITH_PTNETMAP_HOST 251 /* 252 * ptnetmap kernel thread routines 253 * */ 254 255 /* Functions to read and write CSB fields in the host */ 256 #if defined (linux) 257 #define CSB_READ(csb, field, r) (get_user(r, &csb->field)) 258 #define CSB_WRITE(csb, field, v) (put_user(v, &csb->field)) 259 #else /* ! linux */ 260 #define CSB_READ(csb, field, r) (r = fuword32(&csb->field)) 261 #define CSB_WRITE(csb, field, v) (suword32(&csb->field, v)) 262 #endif /* ! linux */ 263 264 /* Host netmap: Write kring pointers (hwcur, hwtail) to the CSB. 265 * This routine is coupled with ptnetmap_guest_read_kring_csb(). */ 266 static inline void 267 ptnetmap_host_write_kring_csb(struct ptnet_csb_hg __user *ptr, uint32_t hwcur, 268 uint32_t hwtail) 269 { 270 /* 271 * The same scheme used in ptnetmap_guest_write_kring_csb() applies here. 272 * We allow the guest to read a value of hwcur more recent than the value 273 * of hwtail, since this would anyway result in a consistent view of the 274 * ring state (and hwcur can never wraparound hwtail, since hwcur must be 275 * behind head). 276 * 277 * The following memory barrier scheme is used to make this happen: 278 * 279 * Guest Host 280 * 281 * STORE(hwcur) LOAD(hwtail) 282 * mb() <-------------> mb() 283 * STORE(hwtail) LOAD(hwcur) 284 */ 285 CSB_WRITE(ptr, hwcur, hwcur); 286 mb(); 287 CSB_WRITE(ptr, hwtail, hwtail); 288 } 289 290 /* Host netmap: Read kring pointers (head, cur, sync_flags) from the CSB. 291 * This routine is coupled with ptnetmap_guest_write_kring_csb(). */ 292 static inline void 293 ptnetmap_host_read_kring_csb(struct ptnet_csb_gh __user *ptr, 294 struct netmap_ring *shadow_ring, 295 uint32_t num_slots) 296 { 297 /* 298 * We place a memory barrier to make sure that the update of head never 299 * overtakes the update of cur. 300 * (see explanation in ptnetmap_guest_write_kring_csb). 301 */ 302 CSB_READ(ptr, head, shadow_ring->head); 303 mb(); 304 CSB_READ(ptr, cur, shadow_ring->cur); 305 CSB_READ(ptr, sync_flags, shadow_ring->flags); 306 } 307 308 #endif /* WITH_PTNETMAP_HOST */ 309 310 #endif /* NETMAP_VIRT_H */ 311