xref: /freebsd/sys/net/netmap_virt.h (revision 0629b152762b06325dd75a41bcb0a2789514141b)
1 /*
2  * Copyright (C) 2013-2016 Luigi Rizzo
3  * Copyright (C) 2013-2016 Giuseppe Lettieri
4  * Copyright (C) 2013-2016 Vincenzo Maffione
5  * Copyright (C) 2015 Stefano Garzarella
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *   1. Redistributions of source code must retain the above copyright
12  *      notice, this list of conditions and the following disclaimer.
13  *   2. Redistributions in binary form must reproduce the above copyright
14  *      notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #ifndef NETMAP_VIRT_H
33 #define NETMAP_VIRT_H
34 
35 /*
36  * ptnetmap_memdev: device used to expose memory into the guest VM
37  *
38  * These macros are used in the hypervisor frontend (QEMU, bhyve) and in the
39  * guest device driver.
40  */
41 
42 /* PCI identifiers and PCI BARs for the ptnetmap memdev
43  * and ptnetmap network interface. */
44 #define PTNETMAP_MEMDEV_NAME            "ptnetmap-memdev"
45 #define PTNETMAP_PCI_VENDOR_ID          0x1b36  /* QEMU virtual devices */
46 #define PTNETMAP_PCI_DEVICE_ID          0x000c  /* memory device */
47 #define PTNETMAP_PCI_NETIF_ID           0x000d  /* ptnet network interface */
48 #define PTNETMAP_IO_PCI_BAR             0
49 #define PTNETMAP_MEM_PCI_BAR            1
50 #define PTNETMAP_MSIX_PCI_BAR           2
51 
52 /* Registers for the ptnetmap memdev */
53 #define PTNET_MDEV_IO_MEMSIZE_LO	0	/* netmap memory size (low) */
54 #define PTNET_MDEV_IO_MEMSIZE_HI	4	/* netmap_memory_size (high) */
55 #define PTNET_MDEV_IO_MEMID		8	/* memory allocator ID in the host */
56 #define PTNET_MDEV_IO_IF_POOL_OFS	64
57 #define PTNET_MDEV_IO_IF_POOL_OBJNUM	68
58 #define PTNET_MDEV_IO_IF_POOL_OBJSZ	72
59 #define PTNET_MDEV_IO_RING_POOL_OFS	76
60 #define PTNET_MDEV_IO_RING_POOL_OBJNUM	80
61 #define PTNET_MDEV_IO_RING_POOL_OBJSZ	84
62 #define PTNET_MDEV_IO_BUF_POOL_OFS	88
63 #define PTNET_MDEV_IO_BUF_POOL_OBJNUM	92
64 #define PTNET_MDEV_IO_BUF_POOL_OBJSZ	96
65 #define PTNET_MDEV_IO_END		100
66 
67 /*
68  * ptnetmap configuration
69  *
70  * The ptnet kthreads (running in host kernel-space) need to be configured
71  * in order to know how to intercept guest kicks (I/O register writes) and
72  * how to inject MSI-X interrupts to the guest. The configuration may vary
73  * depending on the hypervisor. Currently, we support QEMU/KVM on Linux and
74  * and bhyve on FreeBSD.
75  * The configuration is passed by the hypervisor to the host netmap module
76  * by means of an ioctl() with nr_cmd=NETMAP_PT_HOST_CREATE, and it is
77  * specified by the ptnetmap_cfg struct. This struct contains an header
78  * with general informations and an array of entries whose size depends
79  * on the hypervisor. The NETMAP_PT_HOST_CREATE command is issued every
80  * time the kthreads are started.
81  */
82 struct ptnetmap_cfg {
83 #define PTNETMAP_CFGTYPE_QEMU		0x1
84 #define PTNETMAP_CFGTYPE_BHYVE		0x2
85 	uint16_t cfgtype;	/* how to interpret the cfg entries */
86 	uint16_t entry_size;	/* size of a config entry */
87 	uint32_t num_rings;	/* number of config entries */
88 	void *csb_gh;		/* CSB for guest --> host communication */
89 	void *csb_hg;		/* CSB for host --> guest communication */
90 	/* Configuration entries are allocated right after the struct. */
91 };
92 
93 /* Configuration of a ptnetmap ring for QEMU. */
94 struct ptnetmap_cfgentry_qemu {
95 	uint32_t ioeventfd;	/* to intercept guest register access */
96 	uint32_t irqfd;		/* to inject guest interrupts */
97 };
98 
99 /* Configuration of a ptnetmap ring for bhyve. */
100 struct ptnetmap_cfgentry_bhyve {
101 	uint64_t wchan;		/* tsleep() parameter, to wake up kthread */
102 	uint32_t ioctl_fd;	/* ioctl fd */
103 	/* ioctl parameters to send irq */
104 	uint32_t ioctl_cmd;
105 	/* vmm.ko MSIX parameters for IOCTL */
106 	struct {
107 		uint64_t        msg_data;
108 		uint64_t        addr;
109 	} ioctl_data;
110 };
111 
112 /*
113  * Structure filled-in by the kernel when asked for allocator info
114  * through NETMAP_POOLS_INFO_GET. Used by hypervisors supporting
115  * ptnetmap.
116  */
117 struct netmap_pools_info {
118 	uint64_t memsize;	/* same as nmr->nr_memsize */
119 	uint32_t memid;		/* same as nmr->nr_arg2 */
120 	uint32_t if_pool_offset;
121 	uint32_t if_pool_objtotal;
122 	uint32_t if_pool_objsize;
123 	uint32_t ring_pool_offset;
124 	uint32_t ring_pool_objtotal;
125 	uint32_t ring_pool_objsize;
126 	uint32_t buf_pool_offset;
127 	uint32_t buf_pool_objtotal;
128 	uint32_t buf_pool_objsize;
129 };
130 
131 /*
132  * Pass a pointer to a userspace buffer to be passed to kernelspace for write
133  * or read. Used by NETMAP_PT_HOST_CREATE and NETMAP_POOLS_INFO_GET.
134  */
135 static inline void
136 nmreq_pointer_put(struct nmreq *nmr, void *userptr)
137 {
138 	uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1;
139 	*pp = (uintptr_t)userptr;
140 }
141 
142 static inline void *
143 nmreq_pointer_get(const struct nmreq *nmr)
144 {
145 	const uintptr_t * pp = (const uintptr_t *)&nmr->nr_arg1;
146 	return (void *)*pp;
147 }
148 
149 /* ptnetmap features */
150 #define PTNETMAP_F_VNET_HDR        1
151 
152 /* I/O registers for the ptnet device. */
153 #define PTNET_IO_PTFEAT		0
154 #define PTNET_IO_PTCTL		4
155 #define PTNET_IO_MAC_LO		8
156 #define PTNET_IO_MAC_HI		12
157 #define PTNET_IO_CSBBAH		16 /* deprecated */
158 #define PTNET_IO_CSBBAL		20 /* deprecated */
159 #define PTNET_IO_NIFP_OFS	24
160 #define PTNET_IO_NUM_TX_RINGS	28
161 #define PTNET_IO_NUM_RX_RINGS	32
162 #define PTNET_IO_NUM_TX_SLOTS	36
163 #define PTNET_IO_NUM_RX_SLOTS	40
164 #define PTNET_IO_VNET_HDR_LEN	44
165 #define PTNET_IO_HOSTMEMID	48
166 #define PTNET_IO_CSB_GH_BAH     52
167 #define PTNET_IO_CSB_GH_BAL     56
168 #define PTNET_IO_CSB_HG_BAH     60
169 #define PTNET_IO_CSB_HG_BAL     64
170 #define PTNET_IO_END		68
171 #define PTNET_IO_KICK_BASE	128
172 #define PTNET_IO_MASK		0xff
173 
174 /* ptnetmap control commands (values for PTCTL register) */
175 #define PTNETMAP_PTCTL_CREATE		1
176 #define PTNETMAP_PTCTL_DELETE		2
177 
178 /* ptnetmap synchronization variables shared between guest and host */
179 struct ptnet_csb_gh {
180 	uint32_t head;		  /* GW+ HR+ the head of the guest netmap_ring */
181 	uint32_t cur;		  /* GW+ HR+ the cur of the guest netmap_ring */
182 	uint32_t guest_need_kick; /* GW+ HR+ host-->guest notification enable */
183 	uint32_t sync_flags;	  /* GW+ HR+ the flags of the guest [tx|rx]sync() */
184 	char pad[48];		  /* pad to a 64 bytes cacheline */
185 };
186 struct ptnet_csb_hg {
187 	uint32_t hwcur;		  /* GR+ HW+ the hwcur of the host netmap_kring */
188 	uint32_t hwtail;	  /* GR+ HW+ the hwtail of the host netmap_kring */
189 	uint32_t host_need_kick;  /* GR+ HW+ guest-->host notification enable */
190 	char pad[4+48];
191 };
192 
193 #ifdef WITH_PTNETMAP_GUEST
194 
195 /* ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver */
196 struct ptnetmap_memdev;
197 int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **,
198                           uint64_t *);
199 void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *);
200 uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *, unsigned int);
201 
202 /* Guest driver: Write kring pointers (cur, head) to the CSB.
203  * This routine is coupled with ptnetmap_host_read_kring_csb(). */
204 static inline void
205 ptnetmap_guest_write_kring_csb(struct ptnet_csb_gh *ptr, uint32_t cur,
206 			       uint32_t head)
207 {
208     /*
209      * We need to write cur and head to the CSB but we cannot do it atomically.
210      * There is no way we can prevent the host from reading the updated value
211      * of one of the two and the old value of the other. However, if we make
212      * sure that the host never reads a value of head more recent than the
213      * value of cur we are safe. We can allow the host to read a value of cur
214      * more recent than the value of head, since in the netmap ring cur can be
215      * ahead of head and cur cannot wrap around head because it must be behind
216      * tail. Inverting the order of writes below could instead result into the
217      * host to think head went ahead of cur, which would cause the sync
218      * prologue to fail.
219      *
220      * The following memory barrier scheme is used to make this happen:
221      *
222      *          Guest              Host
223      *
224      *          STORE(cur)         LOAD(head)
225      *          mb() <-----------> mb()
226      *          STORE(head)        LOAD(cur)
227      */
228     ptr->cur = cur;
229     mb();
230     ptr->head = head;
231 }
232 
233 /* Guest driver: Read kring pointers (hwcur, hwtail) from the CSB.
234  * This routine is coupled with ptnetmap_host_write_kring_csb(). */
235 static inline void
236 ptnetmap_guest_read_kring_csb(struct ptnet_csb_hg *pthg, struct netmap_kring *kring)
237 {
238     /*
239      * We place a memory barrier to make sure that the update of hwtail never
240      * overtakes the update of hwcur.
241      * (see explanation in ptnetmap_host_write_kring_csb).
242      */
243     kring->nr_hwtail = pthg->hwtail;
244     mb();
245     kring->nr_hwcur = pthg->hwcur;
246 }
247 
248 #endif /* WITH_PTNETMAP_GUEST */
249 
250 #ifdef WITH_PTNETMAP_HOST
251 /*
252  * ptnetmap kernel thread routines
253  * */
254 
255 /* Functions to read and write CSB fields in the host */
256 #if defined (linux)
257 #define CSB_READ(csb, field, r) (get_user(r, &csb->field))
258 #define CSB_WRITE(csb, field, v) (put_user(v, &csb->field))
259 #else  /* ! linux */
260 #define CSB_READ(csb, field, r) (r = fuword32(&csb->field))
261 #define CSB_WRITE(csb, field, v) (suword32(&csb->field, v))
262 #endif /* ! linux */
263 
264 /* Host netmap: Write kring pointers (hwcur, hwtail) to the CSB.
265  * This routine is coupled with ptnetmap_guest_read_kring_csb(). */
266 static inline void
267 ptnetmap_host_write_kring_csb(struct ptnet_csb_hg __user *ptr, uint32_t hwcur,
268         uint32_t hwtail)
269 {
270     /*
271      * The same scheme used in ptnetmap_guest_write_kring_csb() applies here.
272      * We allow the guest to read a value of hwcur more recent than the value
273      * of hwtail, since this would anyway result in a consistent view of the
274      * ring state (and hwcur can never wraparound hwtail, since hwcur must be
275      * behind head).
276      *
277      * The following memory barrier scheme is used to make this happen:
278      *
279      *          Guest                Host
280      *
281      *          STORE(hwcur)         LOAD(hwtail)
282      *          mb() <-------------> mb()
283      *          STORE(hwtail)        LOAD(hwcur)
284      */
285     CSB_WRITE(ptr, hwcur, hwcur);
286     mb();
287     CSB_WRITE(ptr, hwtail, hwtail);
288 }
289 
290 /* Host netmap: Read kring pointers (head, cur, sync_flags) from the CSB.
291  * This routine is coupled with ptnetmap_guest_write_kring_csb(). */
292 static inline void
293 ptnetmap_host_read_kring_csb(struct ptnet_csb_gh __user *ptr,
294 			     struct netmap_ring *shadow_ring,
295 			     uint32_t num_slots)
296 {
297     /*
298      * We place a memory barrier to make sure that the update of head never
299      * overtakes the update of cur.
300      * (see explanation in ptnetmap_guest_write_kring_csb).
301      */
302     CSB_READ(ptr, head, shadow_ring->head);
303     mb();
304     CSB_READ(ptr, cur, shadow_ring->cur);
305     CSB_READ(ptr, sync_flags, shadow_ring->flags);
306 }
307 
308 #endif /* WITH_PTNETMAP_HOST */
309 
310 #endif /* NETMAP_VIRT_H */
311